Update LLVM to r108428.

author: rdivacky <rdivacky@FreeBSD.org> 2010-07-15 17:06:11 +0000
committer: rdivacky <rdivacky@FreeBSD.org> 2010-07-15 17:06:11 +0000
commit: c1c3262b63b1d5fbba6a7ad188f4e47d92c7840e (patch)
tree: 5b6d391c72c9875f0065f0e772e872bc8544834b
parent: 9112829d76cbb8e0c8ef51bbc2d7d1be48cd7b74 (diff)
download: FreeBSD-src-c1c3262b63b1d5fbba6a7ad188f4e47d92c7840e.zip
FreeBSD-src-c1c3262b63b1d5fbba6a7ad188f4e47d92c7840e.tar.gz
127 files changed, 2486 insertions, 876 deletions
diff --git a/Makefile.rules b/Makefile.rules
index dc15c92..12582f6 100644
--- a/Makefile.rules
+++ b/Makefile.rules
@@ -42,7 +42,7 @@ VPATH=$(PROJ_SRC_DIR)
 # Reset the list of suffixes we know how to build.
 #--------------------------------------------------------------------
 .SUFFIXES:
-.SUFFIXES: .c .cpp .cc .h .hpp .o .a .bc .td .ps .dot .ll
+.SUFFIXES: .c .cpp .cc .h .hpp .o .a .bc .td .ps .dot .ll .m .mm
 .SUFFIXES: $(SHLIBEXT) $(SUFFIXES)
 
 #--------------------------------------------------------------------
@@ -632,8 +632,13 @@ ifdef TOOLNAME
   endif
 endif
 endif
+else
+ifneq ($(DARWIN_MAJVERS),4)
+  LD.Flags += $(RPATH) -Wl,@executable_path/../lib
+endif
 endif
 
+
 #----------------------------------------------------------
 # Options To Invoke Tools
 #----------------------------------------------------------
@@ -1442,6 +1447,11 @@ $(ObjDir)/%.o: %.cpp $(ObjDir)/.dir $(BUILT_SOURCES) $(PROJ_SRC_DIR)/Makefile
 	$(Verb) if $(Compile.CXX) $(DEPEND_OPTIONS) $< -o $(ObjDir)/$*.o ; \
 	        $(DEPEND_MOVEFILE)
 
+$(ObjDir)/%.o: %.mm $(ObjDir)/.dir $(BUILT_SOURCES) $(PROJ_SRC_DIR)/Makefile
+	$(Echo) "Compiling $*.mm for $(BuildMode) build" $(PIC_FLAG)
+	$(Verb) if $(Compile.CXX) $(DEPEND_OPTIONS) $< -o $(ObjDir)/$*.o ; \
+	        $(DEPEND_MOVEFILE)
+
 $(ObjDir)/%.o: %.cc $(ObjDir)/.dir $(BUILT_SOURCES) $(PROJ_SRC_DIR)/Makefile
 	$(Echo) "Compiling $*.cc for $(BuildMode) build" $(PIC_FLAG)
 	$(Verb) if $(Compile.CXX) $(DEPEND_OPTIONS) $< -o $(ObjDir)/$*.o ; \
@@ -1452,6 +1462,11 @@ $(ObjDir)/%.o: %.c $(ObjDir)/.dir $(BUILT_SOURCES) $(PROJ_SRC_DIR)/Makefile
 	$(Verb) if $(Compile.C) $(DEPEND_OPTIONS) $< -o $(ObjDir)/$*.o ; \
 	        $(DEPEND_MOVEFILE)
 
+$(ObjDir)/%.o: %.m $(ObjDir)/.dir $(BUILT_SOURCES) $(PROJ_SRC_DIR)/Makefile
+	$(Echo) "Compiling $*.m for $(BuildMode) build" $(PIC_FLAG)
+	$(Verb) if $(Compile.C) $(DEPEND_OPTIONS) $< -o $(ObjDir)/$*.o ; \
+	        $(DEPEND_MOVEFILE)
+
 #---------------------------------------------------------
 # Create .bc files in the ObjDir directory from .cpp .cc and .c files...
 #---------------------------------------------------------
@@ -1470,6 +1485,12 @@ $(ObjDir)/%.ll: %.cpp $(ObjDir)/.dir $(BUILT_SOURCES) $(LLVMCXX)
                               $< -o $(ObjDir)/$*.ll -S -emit-llvm ; \
 	        $(BC_DEPEND_MOVEFILE)
 
+$(ObjDir)/%.ll: %.mm $(ObjDir)/.dir $(BUILT_SOURCES) $(LLVMCXX)
+	$(Echo) "Compiling $*.mm for $(BuildMode) build (bytecode)"
+	$(Verb) if $(BCCompile.CXX) $(BC_DEPEND_OPTIONS) \
+                              $< -o $(ObjDir)/$*.ll -S -emit-llvm ; \
+	        $(BC_DEPEND_MOVEFILE)
+
 $(ObjDir)/%.ll: %.cc $(ObjDir)/.dir $(BUILT_SOURCES) $(LLVMCXX)
 	$(Echo) "Compiling $*.cc for $(BuildMode) build (bytecode)"
 	$(Verb) if $(BCCompile.CXX) $(BC_DEPEND_OPTIONS) \
@@ -1482,6 +1503,12 @@ $(ObjDir)/%.ll: %.c $(ObjDir)/.dir $(BUILT_SOURCES) $(LLVMCC)
                               $< -o $(ObjDir)/$*.ll -S -emit-llvm ; \
 	        $(BC_DEPEND_MOVEFILE)
 
+$(ObjDir)/%.ll: %.m $(ObjDir)/.dir $(BUILT_SOURCES) $(LLVMCC)
+	$(Echo) "Compiling $*.m for $(BuildMode) build (bytecode)"
+	$(Verb) if $(BCCompile.C) $(BC_DEPEND_OPTIONS) \
+                              $< -o $(ObjDir)/$*.ll -S -emit-llvm ; \
+	        $(BC_DEPEND_MOVEFILE)
+
 # Provide alternate rule sets if dependencies are disabled
 else
 
@@ -1489,6 +1516,10 @@ $(ObjDir)/%.o: %.cpp $(ObjDir)/.dir $(BUILT_SOURCES)
 	$(Echo) "Compiling $*.cpp for $(BuildMode) build" $(PIC_FLAG)
 	$(Compile.CXX) $< -o $@
 
+$(ObjDir)/%.o: %.mm $(ObjDir)/.dir $(BUILT_SOURCES)
+	$(Echo) "Compiling $*.mm for $(BuildMode) build" $(PIC_FLAG)
+	$(Compile.CXX) $< -o $@
+
 $(ObjDir)/%.o: %.cc $(ObjDir)/.dir $(BUILT_SOURCES)
 	$(Echo) "Compiling $*.cc for $(BuildMode) build" $(PIC_FLAG)
 	$(Compile.CXX) $< -o $@
@@ -1497,10 +1528,18 @@ $(ObjDir)/%.o: %.c $(ObjDir)/.dir $(BUILT_SOURCES)
 	$(Echo) "Compiling $*.c for $(BuildMode) build" $(PIC_FLAG)
 	$(Compile.C) $< -o $@
 
+$(ObjDir)/%.o: %.m $(ObjDir)/.dir $(BUILT_SOURCES)
+	$(Echo) "Compiling $*.m for $(BuildMode) build" $(PIC_FLAG)
+	$(Compile.C) $< -o $@
+
 $(ObjDir)/%.ll: %.cpp $(ObjDir)/.dir $(BUILT_SOURCES) $(LLVMCXX)
 	$(Echo) "Compiling $*.cpp for $(BuildMode) build (bytecode)"
 	$(BCCompile.CXX) $< -o $@ -S -emit-llvm
 
+$(ObjDir)/%.ll: %.mm $(ObjDir)/.dir $(BUILT_SOURCES) $(LLVMCXX)
+	$(Echo) "Compiling $*.mm for $(BuildMode) build (bytecode)"
+	$(BCCompile.CXX) $< -o $@ -S -emit-llvm
+
 $(ObjDir)/%.ll: %.cc $(ObjDir)/.dir $(BUILT_SOURCES) $(LLVMCXX)
 	$(Echo) "Compiling $*.cc for $(BuildMode) build (bytecode)"
 	$(BCCompile.CXX) $< -o $@ -S -emit-llvm
@@ -1509,6 +1548,10 @@ $(ObjDir)/%.ll: %.c $(ObjDir)/.dir $(BUILT_SOURCES) $(LLVMCC)
 	$(Echo) "Compiling $*.c for $(BuildMode) build (bytecode)"
 	$(BCCompile.C) $< -o $@ -S -emit-llvm
 
+$(ObjDir)/%.ll: %.m $(ObjDir)/.dir $(BUILT_SOURCES) $(LLVMCC)
+	$(Echo) "Compiling $*.m for $(BuildMode) build (bytecode)"
+	$(BCCompile.C) $< -o $@ -S -emit-llvm
+
 endif
 
 
@@ -1517,6 +1560,10 @@ $(BuildMode)/%.ii: %.cpp $(ObjDir)/.dir $(BUILT_SOURCES)
 	$(Echo) "Compiling $*.cpp for $(BuildMode) build to .ii file"
 	$(Verb) $(Preprocess.CXX) $< -o $@
 
+$(BuildMode)/%.ii: %.mm $(ObjDir)/.dir $(BUILT_SOURCES)
+	$(Echo) "Compiling $*.mm for $(BuildMode) build to .ii file"
+	$(Verb) $(Preprocess.CXX) $< -o $@
+
 $(BuildMode)/%.ii: %.cc $(ObjDir)/.dir $(BUILT_SOURCES)
 	$(Echo) "Compiling $*.cc for $(BuildMode) build to .ii file"
 	$(Verb) $(Preprocess.CXX) $< -o $@
@@ -1525,11 +1572,19 @@ $(BuildMode)/%.i: %.c $(ObjDir)/.dir $(BUILT_SOURCES)
 	$(Echo) "Compiling $*.c for $(BuildMode) build to .i file"
 	$(Verb) $(Preprocess.C) $< -o $@
 
+$(BuildMode)/%.i: %.m $(ObjDir)/.dir $(BUILT_SOURCES)
+	$(Echo) "Compiling $*.m for $(BuildMode) build to .i file"
+	$(Verb) $(Preprocess.C) $< -o $@
+
 
 $(ObjDir)/%.s: %.cpp $(ObjDir)/.dir $(BUILT_SOURCES)
 	$(Echo) "Compiling $*.cpp to asm for $(BuildMode) build" $(PIC_FLAG)
 	$(Compile.CXX) $< -o $@ -S
 
+$(ObjDir)/%.s: %.mm $(ObjDir)/.dir $(BUILT_SOURCES)
+	$(Echo) "Compiling $*.mm to asm for $(BuildMode) build" $(PIC_FLAG)
+	$(Compile.CXX) $< -o $@ -S
+
 $(ObjDir)/%.s: %.cc $(ObjDir)/.dir $(BUILT_SOURCES)
 	$(Echo) "Compiling $*.cc to asm for $(BuildMode) build" $(PIC_FLAG)
 	$(Compile.CXX) $< -o $@ -S
@@ -1538,6 +1593,10 @@ $(ObjDir)/%.s: %.c $(ObjDir)/.dir $(BUILT_SOURCES)
 	$(Echo) "Compiling $*.c to asm for $(BuildMode) build" $(PIC_FLAG)
 	$(Compile.C) $< -o $@ -S
 
+$(ObjDir)/%.s: %.m $(ObjDir)/.dir $(BUILT_SOURCES)
+	$(Echo) "Compiling $*.m to asm for $(BuildMode) build" $(PIC_FLAG)
+	$(Compile.C) $< -o $@ -S
+
 
 # make the C and C++ compilers strip debug info out of bytecode libraries.
 ifdef DEBUG_RUNTIME
@@ -1750,7 +1809,7 @@ ifndef DISABLE_AUTO_DEPENDENCIES
 ifndef IS_CLEANING_TARGET
 
 # Get the list of dependency files
-DependSourceFiles := $(basename $(filter %.cpp %.c %.cc, $(Sources)))
+DependSourceFiles := $(basename $(filter %.cpp %.c %.cc %.m %.mm, $(Sources)))
 DependFiles := $(DependSourceFiles:%=$(PROJ_OBJ_DIR)/$(BuildMode)/%.d)
 
 # Include bitcode dependency files if using bitcode libraries
diff --git a/docs/LangRef.html b/docs/LangRef.html
index ca988b7..4b4348d 100644
--- a/docs/LangRef.html
+++ b/docs/LangRef.html
@@ -954,7 +954,9 @@ define [<a href="#linkage">linkage</a>] [<a href="#visibility">visibility</a>]
 
 <h5>Syntax:</h5>
 <pre class="doc_code">
+; An unnamed metadata node, which is referenced by the named metadata.
 !1 = metadata !{metadata !"one"}
+; A named metadata.
 !name = !{null, !1}
 </pre>
 
@@ -7744,7 +7746,7 @@ LLVM</a>.</p>
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-07-13 14:26:09 +0200 (Tue, 13 Jul 2010) $
+  Last modified: $Date: 2010-07-13 21:48:13 +0200 (Tue, 13 Jul 2010) $
 </address>
 
 </body>
diff --git a/docs/ProgrammersManual.html b/docs/ProgrammersManual.html
index ed6a2b7..9992cd9 100644
--- a/docs/ProgrammersManual.html
+++ b/docs/ProgrammersManual.html
@@ -457,8 +457,8 @@ StringMap class which is used extensively in LLVM and Clang.</p>
 may have embedded null characters.  Therefore, they cannot simply take
 a <tt>const char *</tt>, and taking a <tt>const std::string&amp;</tt> requires
 clients to perform a heap allocation which is usually unnecessary.  Instead,
-many LLVM APIs use a <tt>const StringRef&amp;</tt> or a <tt>const 
-Twine&amp;</tt> for passing strings efficiently.</p>
+many LLVM APIs use a <tt>StringRef</tt> or a <tt>const Twine&amp;</tt> for
+passing strings efficiently.</p>
 
 </div>
 
@@ -477,19 +477,17 @@ on <tt>std:string</tt>, but does not require heap allocation.</p>
 an <tt>std::string</tt>, or explicitly with a character pointer and length.
 For example, the <tt>StringRef</tt> find function is declared as:</p>
 
-<div class="doc_code">
-  iterator find(const StringRef &amp;Key);
-</div>
+<pre class="doc_code">
+  iterator find(StringRef Key);
+</pre>
 
 <p>and clients can call it using any one of:</p>
 
-<div class="doc_code">
-<pre>
+<pre class="doc_code">
   Map.find("foo");                 <i>// Lookup "foo"</i>
   Map.find(std::string("bar"));    <i>// Lookup "bar"</i>
   Map.find(StringRef("\0baz", 4)); <i>// Lookup "\0baz"</i>
 </pre>
-</div>
 
 <p>Similarly, APIs which need to return a string may return a <tt>StringRef</tt>
 instance, which can be used directly or converted to an <tt>std::string</tt>
@@ -499,7 +497,8 @@ for more information.</p>
 
 <p>You should rarely use the <tt>StringRef</tt> class directly, because it contains
 pointers to external memory it is not generally safe to store an instance of the
-class (unless you know that the external storage will not be freed).</p>
+class (unless you know that the external storage will not be freed). StringRef is
+small and pervasive enough in LLVM that it should always be passed by value.</p>
 
 </div>
 
@@ -3943,7 +3942,7 @@ arguments. An argument has a pointer to the parent Function.</p>
   <a href="mailto:dhurjati@cs.uiuc.edu">Dinakar Dhurjati</a> and
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $
+  Last modified: $Date: 2010-07-15 00:38:02 +0200 (Thu, 15 Jul 2010) $
 </address>
 
 </body>
diff --git a/docs/SourceLevelDebugging.html b/docs/SourceLevelDebugging.html
index f70a0d3..9d82e3f 100644
--- a/docs/SourceLevelDebugging.html
+++ b/docs/SourceLevelDebugging.html
@@ -1058,7 +1058,7 @@ int main(int argc, char *argv[]) {
   i32 524329,    ;; Tag
   metadata !"MySource.cpp", 
   metadata !"/Users/mine/sources", 
-  metadata !3    ;; Compile unit
+  metadata !2    ;; Compile unit
 }
 
 ;;
@@ -1068,7 +1068,7 @@ int main(int argc, char *argv[]) {
   i32 524329,    ;; Tag
   metadata !"Myheader.h"
   metadata !"/Users/mine/sources", 
-  metadata !3    ;; Compile unit
+  metadata !2    ;; Compile unit
 }
 
 ...
@@ -1780,7 +1780,7 @@ enum Trees {
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-06-05 00:49:55 +0200 (Sat, 05 Jun 2010) $
+  Last modified: $Date: 2010-07-13 18:53:20 +0200 (Tue, 13 Jul 2010) $
 </address>
 
 </body>
diff --git a/include/llvm/ADT/APFloat.h b/include/llvm/ADT/APFloat.h
index 3cccc81..dfe4e0f 100644
--- a/include/llvm/ADT/APFloat.h
+++ b/include/llvm/ADT/APFloat.h
@@ -179,7 +179,7 @@ namespace llvm {
 
     // Constructors.
     APFloat(const fltSemantics &); // Default construct to 0.0
-    APFloat(const fltSemantics &, const StringRef &);
+    APFloat(const fltSemantics &, StringRef);
     APFloat(const fltSemantics &, integerPart);
     APFloat(const fltSemantics &, fltCategory, bool negative);
     APFloat(const fltSemantics &, uninitializedTag);
@@ -282,7 +282,7 @@ namespace llvm {
                                             bool, roundingMode);
     opStatus convertFromZeroExtendedInteger(const integerPart *, unsigned int,
                                             bool, roundingMode);
-    opStatus convertFromString(const StringRef&, roundingMode);
+    opStatus convertFromString(StringRef, roundingMode);
     APInt bitcastToAPInt() const;
     double convertToDouble() const;
     float convertToFloat() const;
@@ -386,8 +386,8 @@ namespace llvm {
                                           roundingMode, bool *) const;
     opStatus convertFromUnsignedParts(const integerPart *, unsigned int,
                                       roundingMode);
-    opStatus convertFromHexadecimalString(const StringRef&, roundingMode);
-    opStatus convertFromDecimalString (const StringRef&, roundingMode);
+    opStatus convertFromHexadecimalString(StringRef, roundingMode);
+    opStatus convertFromDecimalString(StringRef, roundingMode);
     char *convertNormalToHexString(char *, unsigned int, bool,
                                    roundingMode) const;
     opStatus roundSignificandWithExponent(const integerPart *, unsigned int,
diff --git a/include/llvm/ADT/APInt.h b/include/llvm/ADT/APInt.h
index ec76fbd..59e023b 100644
--- a/include/llvm/ADT/APInt.h
+++ b/include/llvm/ADT/APInt.h
@@ -162,7 +162,7 @@ class APInt {
   ///
   /// @param radix 2, 8, 10, or 16
   /// @brief Convert a char array into an APInt
-  void fromString(unsigned numBits, const StringRef &str, uint8_t radix);
+  void fromString(unsigned numBits, StringRef str, uint8_t radix);
 
   /// This is used by the toString method to divide by the radix. It simply
   /// provides a more convenient form of divide for internal use since KnuthDiv
@@ -248,7 +248,7 @@ public:
   /// @param str the string to be interpreted
   /// @param radix the radix to use for the conversion 
   /// @brief Construct an APInt from a string representation.
-  APInt(unsigned numBits, const StringRef &str, uint8_t radix);
+  APInt(unsigned numBits, StringRef str, uint8_t radix);
 
   /// Simply makes *this a copy of that.
   /// @brief Copy Constructor.
@@ -1153,7 +1153,7 @@ public:
   /// This method determines how many bits are required to hold the APInt
   /// equivalent of the string given by \arg str.
   /// @brief Get bits required for string value.
-  static unsigned getBitsNeeded(const StringRef& str, uint8_t radix);
+  static unsigned getBitsNeeded(StringRef str, uint8_t radix);
 
   /// countLeadingZeros - This function is an APInt version of the
   /// countLeadingZeros_{32,64} functions in MathExtras.h. It counts the number
diff --git a/include/llvm/CodeGen/FastISel.h b/include/llvm/CodeGen/FastISel.h
index 7f3a7c7..79b1554 100644
--- a/include/llvm/CodeGen/FastISel.h
+++ b/include/llvm/CodeGen/FastISel.h
@@ -106,12 +106,17 @@ public:
   /// into the current block.
   void recomputeInsertPt();
 
+  struct SavePoint {
+    MachineBasicBlock::iterator InsertPt;
+    DebugLoc DL;
+  };
+
   /// enterLocalValueArea - Prepare InsertPt to begin inserting instructions
   /// into the local value area and return the old insert position.
-  MachineBasicBlock::iterator enterLocalValueArea();
+  SavePoint enterLocalValueArea();
 
-  /// leaveLocalValueArea - Reset InsertPt to the given old insert position
-  void leaveLocalValueArea(MachineBasicBlock::iterator OldInsertPt);
+  /// leaveLocalValueArea - Reset InsertPt to the given old insert position.
+  void leaveLocalValueArea(SavePoint Old);
 
   virtual ~FastISel();
 
@@ -302,8 +307,6 @@ protected:
   }
 
 private:
-  bool SelectLoad(const User *I);
-
   bool SelectBinaryOp(const User *I, unsigned ISDOpcode);
 
   bool SelectFNeg(const User *I);
diff --git a/include/llvm/CodeGen/LiveIntervalAnalysis.h b/include/llvm/CodeGen/LiveIntervalAnalysis.h
index 5a0d81b..c136048 100644
--- a/include/llvm/CodeGen/LiveIntervalAnalysis.h
+++ b/include/llvm/CodeGen/LiveIntervalAnalysis.h
@@ -272,10 +272,6 @@ namespace llvm {
     unsigned getNumConflictsWithPhysReg(const LiveInterval &li,
                                         unsigned PhysReg) const;
 
-    /// processImplicitDefs - Process IMPLICIT_DEF instructions. Add isUndef
-    /// marker to implicit_def defs and their uses.
-    void processImplicitDefs();
-
     /// intervalIsInOneMBB - Returns true if the specified interval is entirely
     /// within a single basic block.
     bool intervalIsInOneMBB(const LiveInterval &li) const;
diff --git a/include/llvm/CodeGen/MachineModuleInfo.h b/include/llvm/CodeGen/MachineModuleInfo.h
index 84aef10..50e38b4 100644
--- a/include/llvm/CodeGen/MachineModuleInfo.h
+++ b/include/llvm/CodeGen/MachineModuleInfo.h
@@ -344,7 +344,7 @@ public:
     VariableDbgInfo.push_back(std::make_pair(N, std::make_pair(Slot, Loc)));
   }
 
-  VariableDbgInfoMapTy &getVariableDbgInfo() {  return VariableDbgInfo;  }
+  VariableDbgInfoMapTy &getVariableDbgInfo();
 
 }; // End class MachineModuleInfo
 
diff --git a/include/llvm/CodeGen/ProcessImplicitDefs.h b/include/llvm/CodeGen/ProcessImplicitDefs.h
index cec867f..30477b9 100644
--- a/include/llvm/CodeGen/ProcessImplicitDefs.h
+++ b/include/llvm/CodeGen/ProcessImplicitDefs.h
@@ -12,6 +12,7 @@
 #define LLVM_CODEGEN_PROCESSIMPLICITDEFS_H
 
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/ADT/SmallSet.h"
 
 namespace llvm {
 
@@ -24,7 +25,8 @@ namespace llvm {
   private:
 
     bool CanTurnIntoImplicitDef(MachineInstr *MI, unsigned Reg,
-                                unsigned OpIdx, const TargetInstrInfo *tii_);
+                                unsigned OpIdx, const TargetInstrInfo *tii_,
+                                SmallSet<unsigned, 8> &ImpDefRegs);
 
   public:
     static char ID;
diff --git a/include/llvm/MC/MCParser/AsmParser.h b/include/llvm/MC/MCParser/AsmParser.h
index 82b120b..0e8570a 100644
--- a/include/llvm/MC/MCParser/AsmParser.h
+++ b/include/llvm/MC/MCParser/AsmParser.h
@@ -107,7 +107,7 @@ private:
   
   void EatToEndOfStatement();
   
-  bool ParseAssignment(const StringRef &Name);
+  bool ParseAssignment(StringRef Name);
 
   bool ParsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc);
   bool ParseBinOpRHS(unsigned Precedence, const MCExpr *&Res, SMLoc &EndLoc);
diff --git a/include/llvm/Support/COFF.h b/include/llvm/Support/COFF.h
index 2d4e054..69137bf 100644
--- a/include/llvm/Support/COFF.h
+++ b/include/llvm/Support/COFF.h
@@ -10,12 +10,12 @@
 // This file contains an definitions used in Windows COFF Files.
 //
 // Structures and enums defined within this file where created using
-// information from Microsofts publicly available PE/COFF format document:
+// information from Microsoft's publicly available PE/COFF format document:
 // 
 // Microsoft Portable Executable and Common Object File Format Specification
 // Revision 8.1 - February 15, 2008
 //
-// As of 5/2/2010, hosted by microsoft at:
+// As of 5/2/2010, hosted by Microsoft at:
 // http://www.microsoft.com/whdc/system/platform/firmware/pecoff.mspx
 //
 //===----------------------------------------------------------------------===//
@@ -57,7 +57,7 @@ namespace COFF {
     uint8_t  NumberOfAuxSymbols;
   };
 
-  enum symbol_flags {
+  enum SymbolFlags {
     SF_TypeMask = 0x0000FFFF,
     SF_TypeShift = 0,
 
@@ -67,36 +67,70 @@ namespace COFF {
     SF_WeakReference = 0x01000000
   };
 
-  enum symbol_storage_class {
-    IMAGE_SYM_CLASS_END_OF_FUNCTION  = -1,
-    IMAGE_SYM_CLASS_NULL             = 0,
-    IMAGE_SYM_CLASS_AUTOMATIC        = 1,
-    IMAGE_SYM_CLASS_EXTERNAL         = 2,
-    IMAGE_SYM_CLASS_STATIC           = 3,
-    IMAGE_SYM_CLASS_REGISTER         = 4,
-    IMAGE_SYM_CLASS_EXTERNAL_DEF     = 5,
-    IMAGE_SYM_CLASS_LABEL            = 6,
-    IMAGE_SYM_CLASS_UNDEFINED_LABEL  = 7,
-    IMAGE_SYM_CLASS_MEMBER_OF_STRUCT = 8,
-    IMAGE_SYM_CLASS_ARGUMENT         = 9,
-    IMAGE_SYM_CLASS_STRUCT_TAG       = 10,
-    IMAGE_SYM_CLASS_MEMBER_OF_UNION  = 11,
-    IMAGE_SYM_CLASS_UNION_TAG        = 12,
-    IMAGE_SYM_CLASS_TYPE_DEFINITION  = 13,
-    IMAGE_SYM_CLASS_UNDEFINED_STATIC = 14,
-    IMAGE_SYM_CLASS_ENUM_TAG         = 15,
-    IMAGE_SYM_CLASS_MEMBER_OF_ENUM   = 16,
-    IMAGE_SYM_CLASS_REGISTER_PARAM   = 17,
-    IMAGE_SYM_CLASS_BIT_FIELD        = 18,
+  /// Storage class tells where and what the symbol represents
+  enum SymbolStorageClass {
+    IMAGE_SYM_CLASS_END_OF_FUNCTION  = -1,  ///< Physical end of function
+    IMAGE_SYM_CLASS_NULL             = 0,   ///< No symbol
+    IMAGE_SYM_CLASS_AUTOMATIC        = 1,   ///< Stack variable
+    IMAGE_SYM_CLASS_EXTERNAL         = 2,   ///< External symbol
+    IMAGE_SYM_CLASS_STATIC           = 3,   ///< Static
+    IMAGE_SYM_CLASS_REGISTER         = 4,   ///< Register variable
+    IMAGE_SYM_CLASS_EXTERNAL_DEF     = 5,   ///< External definition
+    IMAGE_SYM_CLASS_LABEL            = 6,   ///< Label
+    IMAGE_SYM_CLASS_UNDEFINED_LABEL  = 7,   ///< Undefined label
+    IMAGE_SYM_CLASS_MEMBER_OF_STRUCT = 8,   ///< Member of structure
+    IMAGE_SYM_CLASS_ARGUMENT         = 9,   ///< Function argument
+    IMAGE_SYM_CLASS_STRUCT_TAG       = 10,  ///< Structure tag
+    IMAGE_SYM_CLASS_MEMBER_OF_UNION  = 11,  ///< Member of union
+    IMAGE_SYM_CLASS_UNION_TAG        = 12,  ///< Union tag
+    IMAGE_SYM_CLASS_TYPE_DEFINITION  = 13,  ///< Type definition
+    IMAGE_SYM_CLASS_UNDEFINED_STATIC = 14,  ///< Undefined static
+    IMAGE_SYM_CLASS_ENUM_TAG         = 15,  ///< Enumeration tag
+    IMAGE_SYM_CLASS_MEMBER_OF_ENUM   = 16,  ///< Member of enumeration
+    IMAGE_SYM_CLASS_REGISTER_PARAM   = 17,  ///< Register parameter
+    IMAGE_SYM_CLASS_BIT_FIELD        = 18,  ///< Bit field
+    /// ".bb" or ".eb" - beginning or end of block
     IMAGE_SYM_CLASS_BLOCK            = 100,
+    /// ".bf" or ".ef" - beginning or end of function
     IMAGE_SYM_CLASS_FUNCTION         = 101,
-    IMAGE_SYM_CLASS_END_OF_STRUCT    = 102,
-    IMAGE_SYM_CLASS_FILE             = 103,
+    IMAGE_SYM_CLASS_END_OF_STRUCT    = 102, ///< End of structure
+    IMAGE_SYM_CLASS_FILE             = 103, ///< File name
+    /// Line number, reformatted as symbol
     IMAGE_SYM_CLASS_SECTION          = 104,
-    IMAGE_SYM_CLASS_WEAK_EXTERNAL    = 105,
+    IMAGE_SYM_CLASS_WEAK_EXTERNAL    = 105, ///< Duplicate tag
+    /// External symbol in dmert public lib
     IMAGE_SYM_CLASS_CLR_TOKEN        = 107
   };
 
+  enum SymbolBaseType {
+    IMAGE_SYM_TYPE_NULL   = 0,  ///< No type information or unknown base type.
+    IMAGE_SYM_TYPE_VOID   = 1,  ///< Used with void pointers and functions.
+    IMAGE_SYM_TYPE_CHAR   = 2,  ///< A character (signed byte).
+    IMAGE_SYM_TYPE_SHORT  = 3,  ///< A 2-byte signed integer.
+    IMAGE_SYM_TYPE_INT    = 4,  ///< A natural integer type on the target.
+    IMAGE_SYM_TYPE_LONG   = 5,  ///< A 4-byte signed integer.
+    IMAGE_SYM_TYPE_FLOAT  = 6,  ///< A 4-byte floating-point number.
+    IMAGE_SYM_TYPE_DOUBLE = 7,  ///< An 8-byte floating-point number.
+    IMAGE_SYM_TYPE_STRUCT = 8,  ///< A structure.
+    IMAGE_SYM_TYPE_UNION  = 9,  ///< An union.
+    IMAGE_SYM_TYPE_ENUM   = 10, ///< An enumerated type.
+    IMAGE_SYM_TYPE_MOE    = 11, ///< A member of enumeration (a specific value).
+    IMAGE_SYM_TYPE_BYTE   = 12, ///< A byte; unsigned 1-byte integer.
+    IMAGE_SYM_TYPE_WORD   = 13, ///< A word; unsigned 2-byte integer.
+    IMAGE_SYM_TYPE_UINT   = 14, ///< An unsigned integer of natural size.
+    IMAGE_SYM_TYPE_DWORD  = 15  ///< An unsigned 4-byte integer.
+  };
+
+  enum SymbolComplexType {
+    IMAGE_SYM_DTYPE_NULL     = 0, ///< No complex type; simple scalar variable. 
+    IMAGE_SYM_DTYPE_POINTER  = 1, ///< A pointer to base type.
+    IMAGE_SYM_DTYPE_FUNCTION = 2, ///< A function that returns a base type.
+    IMAGE_SYM_DTYPE_ARRAY    = 3, ///< An array of base type.
+    
+    /// Type is formed as (base + (derived << SCT_COMPLEX_TYPE_SHIFT))
+    SCT_COMPLEX_TYPE_SHIFT   = 4
+  };
+
   struct section {
     char     Name[NameSize];
     uint32_t VirtualSize;
@@ -110,7 +144,7 @@ namespace COFF {
     uint32_t Characteristics;
   };
 
-  enum section_characteristics {
+  enum SectionCharacteristics {
     IMAGE_SCN_TYPE_NO_PAD            = 0x00000008,
     IMAGE_SCN_CNT_CODE               = 0x00000020,
     IMAGE_SCN_CNT_INITIALIZED_DATA   = 0x00000040,
@@ -154,7 +188,7 @@ namespace COFF {
     uint16_t Type;
   };
 
-  enum relocation_type_x86 {
+  enum RelocationTypeX86 {
     IMAGE_REL_I386_ABSOLUTE = 0x0000,
     IMAGE_REL_I386_DIR16    = 0x0001,
     IMAGE_REL_I386_REL16    = 0x0002,
diff --git a/include/llvm/Support/Regex.h b/include/llvm/Support/Regex.h
index 591af00..ea65ccf 100644
--- a/include/llvm/Support/Regex.h
+++ b/include/llvm/Support/Regex.h
@@ -36,7 +36,7 @@ namespace llvm {
     /// Compiles the given POSIX Extended Regular Expression \arg Regex.
     /// This implementation supports regexes and matching strings with embedded
     /// NUL characters.
-    Regex(const StringRef &Regex, unsigned Flags = NoFlags);
+    Regex(StringRef Regex, unsigned Flags = NoFlags);
     ~Regex();
 
     /// isValid - returns the error encountered during regex compilation, or
@@ -55,7 +55,7 @@ namespace llvm {
     /// the first group is always the entire pattern.
     ///
     /// This returns true on a successful match.
-    bool match(const StringRef &String, SmallVectorImpl<StringRef> *Matches=0);
+    bool match(StringRef String, SmallVectorImpl<StringRef> *Matches = 0);
 
     /// sub - Return the result of replacing the first match of the regex in
     /// \arg String with the \arg Repl string. Backreferences like "\0" in the
diff --git a/include/llvm/Support/StringPool.h b/include/llvm/Support/StringPool.h
index 82e46d4..de05e0b 100644
--- a/include/llvm/Support/StringPool.h
+++ b/include/llvm/Support/StringPool.h
@@ -64,7 +64,7 @@ namespace llvm {
     /// intern - Adds a string to the pool and returns a reference-counted
     /// pointer to it. No additional memory is allocated if the string already
     /// exists in the pool.
-    PooledStringPtr intern(const StringRef &Str);
+    PooledStringPtr intern(StringRef Str);
 
     /// empty - Checks whether the pool is empty. Returns true if so.
     ///
diff --git a/include/llvm/Target/TargetAsmParser.h b/include/llvm/Target/TargetAsmParser.h
index dc2b236..f431c38 100644
--- a/include/llvm/Target/TargetAsmParser.h
+++ b/include/llvm/Target/TargetAsmParser.h
@@ -49,7 +49,7 @@ public:
   /// \param Operands [out] - The list of parsed operands, this returns
   ///        ownership of them to the caller.
   /// \return True on failure.
-  virtual bool ParseInstruction(const StringRef &Name, SMLoc NameLoc,
+  virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc,
                             SmallVectorImpl<MCParsedAsmOperand*> &Operands) = 0;
 
   /// ParseDirective - Parse a target specific assembler directive
diff --git a/include/llvm/Target/TargetInstrInfo.h b/include/llvm/Target/TargetInstrInfo.h
index 6e69914..e42be26 100644
--- a/include/llvm/Target/TargetInstrInfo.h
+++ b/include/llvm/Target/TargetInstrInfo.h
@@ -371,7 +371,7 @@ public:
                                    unsigned SrcReg, bool isKill, int FrameIndex,
                                    const TargetRegisterClass *RC,
                                    const TargetRegisterInfo *TRI) const {
-    assert(0 && "Target didn't implement TargetInstrInfo::storeRegToStackSlot!");
+  assert(0 && "Target didn't implement TargetInstrInfo::storeRegToStackSlot!");
   }
 
   /// loadRegFromStackSlot - Load the specified register of the given register
@@ -383,7 +383,7 @@ public:
                                     unsigned DestReg, int FrameIndex,
                                     const TargetRegisterClass *RC,
                                     const TargetRegisterInfo *TRI) const {
-    assert(0 && "Target didn't implement TargetInstrInfo::loadRegFromStackSlot!");
+  assert(0 && "Target didn't implement TargetInstrInfo::loadRegFromStackSlot!");
   }
   
   /// spillCalleeSavedRegisters - Issues instruction(s) to spill all callee
@@ -392,7 +392,7 @@ public:
   /// storeRegToStackSlot(). Returns false otherwise.
   virtual bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
                                          MachineBasicBlock::iterator MI,
-                                         const std::vector<CalleeSavedInfo> &CSI,
+                                        const std::vector<CalleeSavedInfo> &CSI,
                                          const TargetRegisterInfo *TRI) const {
     return false;
   }
@@ -457,7 +457,7 @@ protected:
   /// take care of adding a MachineMemOperand to the newly created instruction.
   virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
                                               MachineInstr* MI,
-                                              const SmallVectorImpl<unsigned> &Ops,
+                                          const SmallVectorImpl<unsigned> &Ops,
                                               MachineInstr* LoadMI) const {
     return 0;
   }
@@ -501,7 +501,7 @@ public:
   /// only differences between the two addresses are the offset. It also returns
   /// the offsets by reference.
   virtual bool areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
-                                       int64_t &Offset1, int64_t &Offset2) const {
+                                    int64_t &Offset1, int64_t &Offset2) const {
     return false;
   }
 
diff --git a/include/llvm/Target/TargetOptions.h b/include/llvm/Target/TargetOptions.h
index a316c70..b369880 100644
--- a/include/llvm/Target/TargetOptions.h
+++ b/include/llvm/Target/TargetOptions.h
@@ -68,7 +68,7 @@ namespace llvm {
   /// this flag is off (the default), the code generator is not allowed to
   /// produce results that are "less precise" than IEEE allows.  This includes
   /// use of X86 instructions like FSIN and FCOS instead of libcalls.
-  /// UnsafeFPMath implies FiniteOnlyFPMath and LessPreciseFPMAD.
+  /// UnsafeFPMath implies LessPreciseFPMAD.
   extern bool UnsafeFPMath;
 
   /// FiniteOnlyFPMath - This returns true when the -enable-finite-only-fp-math
diff --git a/include/llvm/Value.h b/include/llvm/Value.h
index 16b6207..cfb4422 100644
--- a/include/llvm/Value.h
+++ b/include/llvm/Value.h
@@ -266,6 +266,10 @@ public:
     SubclassOptionalData &= V->SubclassOptionalData;
   }
 
+  /// hasValueHandle - Return true if there is a value handle associated with
+  /// this value.
+  bool hasValueHandle() const { return HasValueHandle; }
+  
   // Methods for support type inquiry through isa, cast, and dyn_cast:
   static inline bool classof(const Value *) {
     return true; // Values are always values.
diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp
index dbefc2d..24cd343 100644
--- a/lib/Analysis/InstructionSimplify.cpp
+++ b/lib/Analysis/InstructionSimplify.cpp
@@ -440,27 +440,47 @@ void llvm::ReplaceAndSimplifyAllUses(Instruction *From, Value *To,
                                      const TargetData *TD) {
   assert(From != To && "ReplaceAndSimplifyAllUses(X,X) is not valid!");
   
-  // FromHandle - This keeps a weakvh on the from value so that we can know if
-  // it gets deleted out from under us in a recursive simplification.
+  // FromHandle/ToHandle - This keeps a WeakVH on the from/to values so that
+  // we can know if it gets deleted out from under us or replaced in a
+  // recursive simplification.
   WeakVH FromHandle(From);
+  WeakVH ToHandle(To);
   
   while (!From->use_empty()) {
     // Update the instruction to use the new value.
-    Use &U = From->use_begin().getUse();
-    Instruction *User = cast<Instruction>(U.getUser());
-    U = To;
+    Use &TheUse = From->use_begin().getUse();
+    Instruction *User = cast<Instruction>(TheUse.getUser());
+    TheUse = To;
+
+    // Check to see if the instruction can be folded due to the operand
+    // replacement.  For example changing (or X, Y) into (or X, -1) can replace
+    // the 'or' with -1.
+    Value *SimplifiedVal;
+    {
+      // Sanity check to make sure 'User' doesn't dangle across
+      // SimplifyInstruction.
+      AssertingVH<> UserHandle(User);
     
-    // See if we can simplify it.
-    if (Value *V = SimplifyInstruction(User, TD)) {
-      // Recursively simplify this.
-      ReplaceAndSimplifyAllUses(User, V, TD);
-      
-      // If the recursive simplification ended up revisiting and deleting 'From'
-      // then we're done.
-      if (FromHandle == 0)
-        return;
+      SimplifiedVal = SimplifyInstruction(User, TD);
+      if (SimplifiedVal == 0) continue;
     }
+    
+    // Recursively simplify this user to the new value.
+    ReplaceAndSimplifyAllUses(User, SimplifiedVal, TD);
+    From = dyn_cast_or_null<Instruction>((Value*)FromHandle);
+    To = ToHandle;
+      
+    assert(ToHandle && "To value deleted by recursive simplification?");
+      
+    // If the recursive simplification ended up revisiting and deleting
+    // 'From' then we're done.
+    if (From == 0)
+      return;
   }
+  
+  // If 'From' has value handles referring to it, do a real RAUW to update them.
+  From->replaceAllUsesWith(To);
+  
   From->eraseFromParent();
 }
 
diff --git a/lib/Analysis/ProfileInfo.cpp b/lib/Analysis/ProfileInfo.cpp
index 38dcd25..8d2712f 100644
--- a/lib/Analysis/ProfileInfo.cpp
+++ b/lib/Analysis/ProfileInfo.cpp
@@ -71,22 +71,24 @@ ProfileInfoT<Function,BasicBlock>::getExecutionCount(const BasicBlock *BB) {
 
   // Are there zero predecessors of this block?
   if (PI == PE) {
-    Edge e = getEdge(0,BB);
+    Edge e = getEdge(0, BB);
     Count = getEdgeWeight(e);
   } else {
     // Otherwise, if there are predecessors, the execution count of this block is
     // the sum of the edge frequencies from the incoming edges.
     std::set<const BasicBlock*> ProcessedPreds;
     Count = 0;
-    for (; PI != PE; ++PI)
-      if (ProcessedPreds.insert(*PI).second) {
-        double w = getEdgeWeight(getEdge(*PI, BB));
+    for (; PI != PE; ++PI) {
+      const BasicBlock *P = *PI;
+      if (ProcessedPreds.insert(P).second) {
+        double w = getEdgeWeight(getEdge(P, BB));
         if (w == MissingValue) {
           Count = MissingValue;
           break;
         }
         Count += w;
       }
+    }
   }
 
   // If the predecessors did not suffice to get block weight, try successors.
diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp
index 6752181..221b994 100644
--- a/lib/AsmParser/LLParser.cpp
+++ b/lib/AsmParser/LLParser.cpp
@@ -544,20 +544,21 @@ bool LLParser::ParseNamedMetadata() {
     return true;
 
   SmallVector<MDNode *, 8> Elts;
-  do {
-    // Null is a special case since it is typeless.
-    if (EatIfPresent(lltok::kw_null)) {
-      Elts.push_back(0);
-      continue;
-    }
+  if (Lex.getKind() != lltok::rbrace)
+    do {
+      // Null is a special case since it is typeless.
+      if (EatIfPresent(lltok::kw_null)) {
+        Elts.push_back(0);
+        continue;
+      }
 
-    if (ParseToken(lltok::exclaim, "Expected '!' here"))
-      return true;
+      if (ParseToken(lltok::exclaim, "Expected '!' here"))
+        return true;
     
-    MDNode *N = 0;
-    if (ParseMDNodeID(N)) return true;
-    Elts.push_back(N);
-  } while (EatIfPresent(lltok::comma));
+      MDNode *N = 0;
+      if (ParseMDNodeID(N)) return true;
+      Elts.push_back(N);
+    } while (EatIfPresent(lltok::comma));
 
   if (ParseToken(lltok::rbrace, "expected end of metadata node"))
     return true;
@@ -2021,33 +2022,8 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
     ID.StrVal = Lex.getStrVal();
     ID.Kind = ValID::t_LocalName;
     break;
-  case lltok::exclaim:   // !{...} MDNode, !"foo" MDString
-    Lex.Lex();
-    
-    if (EatIfPresent(lltok::lbrace)) {
-      SmallVector<Value*, 16> Elts;
-      if (ParseMDNodeVector(Elts, PFS) ||
-          ParseToken(lltok::rbrace, "expected end of metadata node"))
-        return true;
-
-      ID.MDNodeVal = MDNode::get(Context, Elts.data(), Elts.size());
-      ID.Kind = ValID::t_MDNode;
-      return false;
-    }
-
-    // Standalone metadata reference
-    // !{ ..., !42, ... }
-    if (Lex.getKind() == lltok::APSInt) {
-      if (ParseMDNodeID(ID.MDNodeVal)) return true;
-      ID.Kind = ValID::t_MDNode;
-      return false;
-    }
-    
-    // MDString:
-    //   ::= '!' STRINGCONSTANT
-    if (ParseMDString(ID.MDStringVal)) return true;
-    ID.Kind = ValID::t_MDString;
-    return false;
+  case lltok::exclaim:   // !42, !{...}, or !"foo"
+    return ParseMetadataValue(ID, PFS);
   case lltok::APSInt:
     ID.APSIntVal = Lex.getAPSIntVal();
     ID.Kind = ValID::t_APSInt;
@@ -2528,6 +2504,42 @@ bool LLParser::ParseGlobalValueVector(SmallVectorImpl<Constant*> &Elts) {
   return false;
 }
 
+/// ParseMetadataValue
+///  ::= !42
+///  ::= !{...}
+///  ::= !"string"
+bool LLParser::ParseMetadataValue(ValID &ID, PerFunctionState *PFS) {
+  assert(Lex.getKind() == lltok::exclaim);
+  Lex.Lex();
+
+  // MDNode:
+  // !{ ... }
+  if (EatIfPresent(lltok::lbrace)) {
+    SmallVector<Value*, 16> Elts;
+    if (ParseMDNodeVector(Elts, PFS) ||
+        ParseToken(lltok::rbrace, "expected end of metadata node"))
+      return true;
+
+    ID.MDNodeVal = MDNode::get(Context, Elts.data(), Elts.size());
+    ID.Kind = ValID::t_MDNode;
+    return false;
+  }
+
+  // Standalone metadata reference
+  // !42
+  if (Lex.getKind() == lltok::APSInt) {
+    if (ParseMDNodeID(ID.MDNodeVal)) return true;
+    ID.Kind = ValID::t_MDNode;
+    return false;
+  }
+
+  // MDString:
+  //   ::= '!' STRINGCONSTANT
+  if (ParseMDString(ID.MDStringVal)) return true;
+  ID.Kind = ValID::t_MDString;
+  return false;
+}
+
 
 //===----------------------------------------------------------------------===//
 // Function Parsing.
@@ -3983,6 +3995,10 @@ int LLParser::ParseInsertValue(Instruction *&Inst, PerFunctionState &PFS) {
 ///   ::= 'null' | TypeAndValue
 bool LLParser::ParseMDNodeVector(SmallVectorImpl<Value*> &Elts,
                                  PerFunctionState *PFS) {
+  // Check for an empty list.
+  if (Lex.getKind() == lltok::rbrace)
+    return false;
+
   do {
     // Null is a special case since it is typeless.
     if (EatIfPresent(lltok::kw_null)) {
diff --git a/lib/AsmParser/LLParser.h b/lib/AsmParser/LLParser.h
index c8f669f..f765a2a 100644
--- a/lib/AsmParser/LLParser.h
+++ b/lib/AsmParser/LLParser.h
@@ -308,6 +308,7 @@ namespace llvm {
     bool ParseGlobalValue(const Type *Ty, Constant *&V);
     bool ParseGlobalTypeAndValue(Constant *&V);
     bool ParseGlobalValueVector(SmallVectorImpl<Constant*> &Elts);
+    bool ParseMetadataValue(ValID &ID, PerFunctionState *PFS);
     bool ParseMDNodeVector(SmallVectorImpl<Value*> &, PerFunctionState *PFS);
 
     // Function Parsing.
diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp
index 527ae49..b3f0776 100644
--- a/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -820,7 +820,7 @@ bool BitcodeReader::ParseMetadata() {
       IsFunctionLocal = true;
       // fall-through
     case bitc::METADATA_NODE: {
-      if (Record.empty() || Record.size() % 2 == 1)
+      if (Record.size() % 2 == 1)
         return Error("Invalid METADATA_NODE record");
 
       unsigned Size = Record.size();
@@ -834,7 +834,8 @@ bool BitcodeReader::ParseMetadata() {
         else
           Elts.push_back(NULL);
       }
-      Value *V = MDNode::getWhenValsUnresolved(Context, &Elts[0], Elts.size(),
+      Value *V = MDNode::getWhenValsUnresolved(Context,
+                                               Elts.data(), Elts.size(),
                                                IsFunctionLocal);
       IsFunctionLocal = false;
       MDValueList.AssignValue(V, NextMDValueNo++);
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index d9387a8..db1b37a 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -178,7 +178,7 @@ bool AsmPrinter::doInitialization(Module &M) {
   if (!M.getModuleInlineAsm().empty()) {
     OutStreamer.AddComment("Start of file scope inline assembly");
     OutStreamer.AddBlankLine();
-    EmitInlineAsm(M.getModuleInlineAsm(), 0/*no loc cookie*/);
+    EmitInlineAsm(M.getModuleInlineAsm()+"\n", 0/*no loc cookie*/);
     OutStreamer.AddComment("End of file scope inline assembly");
     OutStreamer.AddBlankLine();
   }
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
index f6f3bae..202d9b6 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -53,17 +53,6 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, unsigned LocCookie) const {
   }
   
   SourceMgr SrcMgr;
-
-  // Ensure the buffer is newline terminated.
-  char *TmpString = 0;
-  if (Str.back() != '\n') {
-    TmpString = new char[Str.size() + 2];
-    memcpy(TmpString, Str.data(), Str.size());
-    TmpString[Str.size()] = '\n';
-    TmpString[Str.size() + 1] = 0;
-    isNullTerminated = true;
-    Str = TmpString;
-  }
   
   // If the current LLVMContext has an inline asm handler, set it in SourceMgr.
   LLVMContext &LLVMCtx = MMI->getModule()->getContext();
@@ -95,9 +84,6 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, unsigned LocCookie) const {
                        /*NoFinalize*/ true);
   if (Res && !HasDiagHandler)
     report_fatal_error("Error parsing inline asm\n");
-
-  if (TmpString)
-    delete[] TmpString;
 }
 
 
diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp
index 21a9b7d..ad57284 100644
--- a/lib/CodeGen/LiveInterval.cpp
+++ b/lib/CodeGen/LiveInterval.cpp
@@ -119,6 +119,7 @@ bool LiveInterval::killedInRange(SlotIndex Start, SlotIndex End) const {
 //
 bool LiveInterval::overlapsFrom(const LiveInterval& other,
                                 const_iterator StartPos) const {
+  assert(!empty() && "empty interval");
   const_iterator i = begin();
   const_iterator ie = end();
   const_iterator j = StartPos;
@@ -161,16 +162,8 @@ bool LiveInterval::overlapsFrom(const LiveInterval& other,
 /// by [Start, End).
 bool LiveInterval::overlaps(SlotIndex Start, SlotIndex End) const {
   assert(Start < End && "Invalid range");
-  const_iterator I  = begin();
-  const_iterator E  = end();
-  const_iterator si = std::upper_bound(I, E, Start);
-  const_iterator ei = std::upper_bound(I, E, End);
-  if (si != ei)
-    return true;
-  if (si == I)
-    return false;
-  --si;
-  return si->contains(Start);
+  const_iterator I = std::lower_bound(begin(), end(), End);
+  return I != begin() && (--I)->end > Start;
 }
 
 /// extendIntervalEndTo - This method is used when we want to extend the range
@@ -868,6 +861,10 @@ void LiveInterval::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const {
           OS << "?";
         else
           OS << vni->def;
+        if (vni->hasPHIKill())
+          OS << "-phikill";
+        if (vni->hasRedefByEC())
+          OS << "-ec";
       }
     }
   }
diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp
index 956d21c..4c054f5 100644
--- a/lib/CodeGen/MachineLICM.cpp
+++ b/lib/CodeGen/MachineLICM.cpp
@@ -497,11 +497,6 @@ void MachineLICM::HoistRegion(MachineDomTreeNode *N) {
 /// candidate for LICM. e.g. If the instruction is a call, then it's obviously
 /// not safe to hoist it.
 bool MachineLICM::IsLICMCandidate(MachineInstr &I) {
-  // It is not profitable to hoist implicitdefs.  FIXME: Why not?  what if they
-  // are an argument to some other otherwise-hoistable instruction?
-  if (I.isImplicitDef())
-    return false;
-  
   // Check if it's safe to move the instruction.
   bool DontMoveAcrossStore = true;
   if (!I.isSafeToMove(TII, AA, DontMoveAcrossStore))
@@ -717,7 +712,9 @@ MachineLICM::LookForDuplicate(const MachineInstr *MI,
 
 bool MachineLICM::EliminateCSE(MachineInstr *MI,
           DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator &CI) {
-  if (CI == CSEMap.end())
+  // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate
+  // the undef property onto uses.
+  if (CI == CSEMap.end() || MI->isImplicitDef())
     return false;
 
   if (const MachineInstr *Dup = LookForDuplicate(MI, CI->second)) {
diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp
index 25284d6..15778b4 100644
--- a/lib/CodeGen/MachineModuleInfo.cpp
+++ b/lib/CodeGen/MachineModuleInfo.cpp
@@ -563,3 +563,26 @@ unsigned MachineModuleInfo::getPersonalityIndex() const {
   return 0;
 }
 
+namespace {
+  /// VariableDebugSorter - Comparison to sort the VariableDbgInfo map
+  /// by source location, to avoid depending on the arbitrary order that
+  /// instruction selection visits variables in.
+  struct VariableDebugSorter {
+    bool operator()(const MachineModuleInfo::VariableDbgInfoMapTy::value_type &A,
+                    const MachineModuleInfo::VariableDbgInfoMapTy::value_type &B)
+                  const {
+       if (A.second.second.getLine() != B.second.second.getLine())
+         return A.second.second.getLine() < B.second.second.getLine();
+       if (A.second.second.getCol() != B.second.second.getCol())
+         return A.second.second.getCol() < B.second.second.getCol();
+       return false;
+    }
+  };
+}
+
+MachineModuleInfo::VariableDbgInfoMapTy &
+MachineModuleInfo::getVariableDbgInfo() {
+  std::stable_sort(VariableDbgInfo.begin(), VariableDbgInfo.end(),
+                   VariableDebugSorter());
+  return VariableDbgInfo;
+}
diff --git a/lib/CodeGen/ProcessImplicitDefs.cpp b/lib/CodeGen/ProcessImplicitDefs.cpp
index ca4c477..2e31908 100644
--- a/lib/CodeGen/ProcessImplicitDefs.cpp
+++ b/lib/CodeGen/ProcessImplicitDefs.cpp
@@ -41,21 +41,51 @@ void ProcessImplicitDefs::getAnalysisUsage(AnalysisUsage &AU) const {
   MachineFunctionPass::getAnalysisUsage(AU);
 }
 
-bool ProcessImplicitDefs::CanTurnIntoImplicitDef(MachineInstr *MI,
-                                                 unsigned Reg, unsigned OpIdx,
-                                                 const TargetInstrInfo *tii_) {
+bool
+ProcessImplicitDefs::CanTurnIntoImplicitDef(MachineInstr *MI,
+                                            unsigned Reg, unsigned OpIdx,
+                                            const TargetInstrInfo *tii_,
+                                            SmallSet<unsigned, 8> &ImpDefRegs) {
   unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
   if (tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubReg, DstSubReg) &&
-      Reg == SrcReg && DstSubReg == 0)
+      Reg == SrcReg &&
+      (DstSubReg == 0 || ImpDefRegs.count(DstReg)))
     return true;
 
   switch(OpIdx) {
-    case 1: return MI->isCopy() && MI->getOperand(0).getSubReg() == 0;
-    case 2: return MI->isSubregToReg() && MI->getOperand(0).getSubReg() == 0;
-    default: return false;
+  case 1:
+    return MI->isCopy() && (MI->getOperand(0).getSubReg() == 0 ||
+                            ImpDefRegs.count(MI->getOperand(0).getReg()));
+  case 2:
+    return MI->isSubregToReg() && (MI->getOperand(0).getSubReg() == 0 ||
+                                  ImpDefRegs.count(MI->getOperand(0).getReg()));
+  default: return false;
   }
 }
 
+static bool isUndefCopy(MachineInstr *MI, unsigned Reg,
+                        const TargetInstrInfo *tii_,
+                        SmallSet<unsigned, 8> &ImpDefRegs) {
+  if (MI->isCopy()) {
+    MachineOperand &MO0 = MI->getOperand(0);
+    MachineOperand &MO1 = MI->getOperand(1);
+    if (MO1.getReg() != Reg)
+      return false;
+    if (!MO0.getSubReg() || ImpDefRegs.count(MO0.getReg()))
+      return true;
+    return false;
+  }
+
+  unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
+  if (tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubReg, DstSubReg)) {
+    if (Reg != SrcReg)
+      return false;
+    if (DstSubReg == 0 || ImpDefRegs.count(DstReg))
+      return true;
+  }
+  return false;
+}
+
 /// processImplicitDefs - Process IMPLICIT_DEF instructions and make sure
 /// there is one implicit_def for each use. Add isUndef marker to
 /// implicit_def defs and their uses.
@@ -104,7 +134,7 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) {
       // Eliminate %reg1032:sub<def> = COPY undef.
       if (MI->isCopy() && MI->getOperand(0).getSubReg()) {
         MachineOperand &MO = MI->getOperand(1);
-        if (ImpDefRegs.count(MO.getReg())) {
+        if (MO.isUndef() || ImpDefRegs.count(MO.getReg())) {
           if (MO.isKill()) {
             LiveVariables::VarInfo& vi = lv_->getVarInfo(MO.getReg());
             vi.removeKill(MI);
@@ -126,7 +156,7 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) {
         if (!ImpDefRegs.count(Reg))
           continue;
         // Use is a copy, just turn it into an implicit_def.
-        if (CanTurnIntoImplicitDef(MI, Reg, i, tii_)) {
+        if (CanTurnIntoImplicitDef(MI, Reg, i, tii_, ImpDefRegs)) {
           bool isKill = MO.isKill();
           MI->setDesc(tii_->get(TargetOpcode::IMPLICIT_DEF));
           for (int j = MI->getNumOperands() - 1, ee = 0; j > ee; --j)
@@ -223,11 +253,7 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) {
         MachineInstr *RMI = RUses[i];
 
         // Turn a copy use into an implicit_def.
-        unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
-        if ((RMI->isCopy() && RMI->getOperand(1).getReg() == Reg &&
-             RMI->getOperand(0).getSubReg() == 0) ||
-            (tii_->isMoveInstr(*RMI, SrcReg, DstReg, SrcSubReg, DstSubReg) &&
-             Reg == SrcReg && DstSubReg == 0)) {
+        if (isUndefCopy(RMI, Reg, tii_, ImpDefRegs)) {
           RMI->setDesc(tii_->get(TargetOpcode::IMPLICIT_DEF));
 
           bool isKill = false;
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index 3f7e4a5..decaa76 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -135,7 +135,7 @@ unsigned FastISel::getRegForValue(const Value *V) {
        !FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(V))))
     return FuncInfo.InitializeRegForValue(V);
 
-  MachineBasicBlock::iterator SaveInsertPt = enterLocalValueArea();
+  SavePoint SaveInsertPt = enterLocalValueArea();
 
   // Materialize the value in a register. Emit any instructions in the
   // local value area.
@@ -286,18 +286,22 @@ void FastISel::recomputeInsertPt() {
     ++FuncInfo.InsertPt;
 }
 
-MachineBasicBlock::iterator FastISel::enterLocalValueArea() {
+FastISel::SavePoint FastISel::enterLocalValueArea() {
   MachineBasicBlock::iterator OldInsertPt = FuncInfo.InsertPt;
+  DebugLoc OldDL = DL;
   recomputeInsertPt();
-  return OldInsertPt;
+  DL = DebugLoc();
+  SavePoint SP = { OldInsertPt, OldDL };
+  return SP;
 }
 
-void FastISel::leaveLocalValueArea(MachineBasicBlock::iterator OldInsertPt) {
+void FastISel::leaveLocalValueArea(SavePoint OldInsertPt) {
   if (FuncInfo.InsertPt != FuncInfo.MBB->begin())
     LastLocalValue = llvm::prior(FuncInfo.InsertPt);
 
   // Restore the previous insert position.
-  FuncInfo.InsertPt = OldInsertPt;
+  FuncInfo.InsertPt = OldInsertPt.InsertPt;
+  DL = OldInsertPt.DL;
 }
 
 /// SelectBinaryOp - Select and emit code for a binary operator instruction,
@@ -779,39 +783,8 @@ FastISel::SelectFNeg(const User *I) {
 }
 
 bool
-FastISel::SelectLoad(const User *I) {
-  LoadInst *LI = const_cast<LoadInst *>(cast<LoadInst>(I));
-
-  // For a load from an alloca, make a limited effort to find the value
-  // already available in a register, avoiding redundant loads.
-  if (!LI->isVolatile() && isa<AllocaInst>(LI->getPointerOperand())) {
-    BasicBlock::iterator ScanFrom = LI;
-    if (const Value *V = FindAvailableLoadedValue(LI->getPointerOperand(),
-                                                  LI->getParent(), ScanFrom)) {
-      if (!V->use_empty() &&
-          (!isa<Instruction>(V) ||
-           cast<Instruction>(V)->getParent() == LI->getParent() ||
-           (isa<AllocaInst>(V) &&
-            FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(V)))) &&
-          (!isa<Argument>(V) ||
-           LI->getParent() == &LI->getParent()->getParent()->getEntryBlock())) {
-      unsigned ResultReg = getRegForValue(V);
-      if (ResultReg != 0) {
-        UpdateValueMap(I, ResultReg);
-        return true;
-      }
-      }
-    }
-  }
-
-  return false;
-}
-
-bool
 FastISel::SelectOperator(const User *I, unsigned Opcode) {
   switch (Opcode) {
-  case Instruction::Load:
-    return SelectLoad(I);
   case Instruction::Add:
     return SelectBinaryOp(I, ISD::ADD);
   case Instruction::FAdd:
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index d323c16..458e865 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -820,7 +820,7 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) {
     unsigned InReg = It->second;
     RegsForValue RFV(*DAG.getContext(), TLI, InReg, V->getType());
     SDValue Chain = DAG.getEntryNode();
-    return N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, NULL);
+    return N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain,NULL);
   }
 
   // Otherwise create a new SDValue and remember it.
@@ -3955,7 +3955,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     if (AA->alias(I.getArgOperand(0), Size, I.getArgOperand(1), Size) ==
         AliasAnalysis::NoAlias) {
       DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, isVol, 
-                                false, I.getArgOperand(0), 0, I.getArgOperand(1), 0));
+                                false, I.getArgOperand(0), 0,
+                                I.getArgOperand(1), 0));
       return 0;
     }
 
@@ -5522,10 +5523,12 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
         break;
       }
 
-      if (OpInfo.ConstraintType == TargetLowering::C_Other) {
-        assert(!OpInfo.isIndirect &&
-               "Don't know how to handle indirect other inputs yet!");
+      // Treat indirect 'X' constraint as memory.
+      if (OpInfo.ConstraintType == TargetLowering::C_Other && 
+          OpInfo.isIndirect) 
+        OpInfo.ConstraintType = TargetLowering::C_Memory;
 
+      if (OpInfo.ConstraintType == TargetLowering::C_Other) {
         std::vector<SDValue> Ops;
         TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode[0],
                                          Ops, DAG);
diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp
index 793f3c7..e0949bd 100644
--- a/lib/MC/MCParser/AsmParser.cpp
+++ b/lib/MC/MCParser/AsmParser.cpp
@@ -712,7 +712,7 @@ bool AsmParser::ParseStatement() {
   return HadError;
 }
 
-bool AsmParser::ParseAssignment(const StringRef &Name) {
+bool AsmParser::ParseAssignment(StringRef Name) {
   // FIXME: Use better location, we should use proper tokens.
   SMLoc EqualLoc = Lexer.getLoc();
 
diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp
index 485bf4d..2e78557 100644
--- a/lib/Support/APFloat.cpp
+++ b/lib/Support/APFloat.cpp
@@ -761,7 +761,7 @@ APFloat::APFloat(const fltSemantics &ourSemantics,
     makeNaN();
 }
 
-APFloat::APFloat(const fltSemantics &ourSemantics, const StringRef& text)
+APFloat::APFloat(const fltSemantics &ourSemantics, StringRef text)
 {
   assertArithmeticOK(ourSemantics);
   initialize(&ourSemantics);
@@ -2185,8 +2185,7 @@ APFloat::convertFromZeroExtendedInteger(const integerPart *parts,
 }
 
 APFloat::opStatus
-APFloat::convertFromHexadecimalString(const StringRef &s,
-                                      roundingMode rounding_mode)
+APFloat::convertFromHexadecimalString(StringRef s, roundingMode rounding_mode)
 {
   lostFraction lost_fraction = lfExactlyZero;
   integerPart *significand;
@@ -2361,7 +2360,7 @@ APFloat::roundSignificandWithExponent(const integerPart *decSigParts,
 }
 
 APFloat::opStatus
-APFloat::convertFromDecimalString(const StringRef &str, roundingMode rounding_mode)
+APFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode)
 {
   decimalInfo D;
   opStatus fs;
@@ -2471,7 +2470,7 @@ APFloat::convertFromDecimalString(const StringRef &str, roundingMode rounding_mo
 }
 
 APFloat::opStatus
-APFloat::convertFromString(const StringRef &str, roundingMode rounding_mode)
+APFloat::convertFromString(StringRef str, roundingMode rounding_mode)
 {
   assertArithmeticOK(*semantics);
   assert(!str.empty() && "Invalid string length");
diff --git a/lib/Support/APInt.cpp b/lib/Support/APInt.cpp
index 1341d21..262fa42 100644
--- a/lib/Support/APInt.cpp
+++ b/lib/Support/APInt.cpp
@@ -102,7 +102,7 @@ APInt::APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[])
   clearUnusedBits();
 }
 
-APInt::APInt(unsigned numbits, const StringRef& Str, uint8_t radix)
+APInt::APInt(unsigned numbits, StringRef Str, uint8_t radix)
   : BitWidth(numbits), VAL(0) {
   assert(BitWidth && "Bitwidth too small");
   fromString(numbits, Str, radix);
@@ -613,7 +613,7 @@ APInt& APInt::flip(unsigned bitPosition) {
   return *this;
 }
 
-unsigned APInt::getBitsNeeded(const StringRef& str, uint8_t radix) {
+unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) {
   assert(!str.empty() && "Invalid string length");
   assert((radix == 10 || radix == 8 || radix == 16 || radix == 2) &&
          "Radix should be 2, 8, 10, or 16!");
@@ -2046,7 +2046,7 @@ void APInt::udivrem(const APInt &LHS, const APInt &RHS,
   divide(LHS, lhsWords, RHS, rhsWords, &Quotient, &Remainder);
 }
 
-void APInt::fromString(unsigned numbits, const StringRef& str, uint8_t radix) {
+void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) {
   // Check our assumptions here
   assert(!str.empty() && "Invalid string length");
   assert((radix == 10 || radix == 8 || radix == 16 || radix == 2) &&
diff --git a/lib/Support/Regex.cpp b/lib/Support/Regex.cpp
index a7631de..309ffb0 100644
--- a/lib/Support/Regex.cpp
+++ b/lib/Support/Regex.cpp
@@ -19,7 +19,7 @@
 #include <string>
 using namespace llvm;
 
-Regex::Regex(const StringRef &regex, unsigned Flags) {
+Regex::Regex(StringRef regex, unsigned Flags) {
   unsigned flags = 0;
   preg = new llvm_regex();
   preg->re_endp = regex.end();
@@ -52,7 +52,7 @@ unsigned Regex::getNumMatches() const {
   return preg->re_nsub;
 }
 
-bool Regex::match(const StringRef &String, SmallVectorImpl<StringRef> *Matches){
+bool Regex::match(StringRef String, SmallVectorImpl<StringRef> *Matches){
   unsigned nmatch = Matches ? preg->re_nsub+1 : 0;
 
   // pmatch needs to have at least one element.
diff --git a/lib/Support/StringPool.cpp b/lib/Support/StringPool.cpp
index 1ee917f..ff607cf 100644
--- a/lib/Support/StringPool.cpp
+++ b/lib/Support/StringPool.cpp
@@ -22,7 +22,7 @@ StringPool::~StringPool() {
   assert(InternTable.empty() && "PooledStringPtr leaked!");
 }
 
-PooledStringPtr StringPool::intern(const StringRef &Key) {
+PooledStringPtr StringPool::intern(StringRef Key) {
   table_t::iterator I = InternTable.find(Key);
   if (I != InternTable.end())
     return PooledStringPtr(&*I);
diff --git a/lib/System/Unix/Program.inc b/lib/System/Unix/Program.inc
index 67018de..0209f5a 100644
--- a/lib/System/Unix/Program.inc
+++ b/lib/System/Unix/Program.inc
@@ -310,12 +310,9 @@ Program::Wait(unsigned secondsToWait,
   // fact of having a handler at all causes the wait below to return with EINTR,
   // unlike if we used SIG_IGN.
   if (secondsToWait) {
-#if !defined(__HAIKU__) && !defined(__minix)
-    Act.sa_sigaction = 0;
-#endif
+    memset(&Act, 0, sizeof(Act));
     Act.sa_handler = TimeOutHandler;
     sigemptyset(&Act.sa_mask);
-    Act.sa_flags = 0;
     sigaction(SIGALRM, &Act, &Old);
     alarm(secondsToWait);
   }
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index f1e6a9f..fa64d6c 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -48,6 +48,8 @@ def FeatureHWDiv  : SubtargetFeature<"hwdiv", "HasHardwareDivide", "true",
                                      "Enable divide instructions">;
 def FeatureT2ExtractPack: SubtargetFeature<"t2xtpk", "HasT2ExtractPack", "true",
                                  "Enable Thumb2 extract and pack instructions">;
+def FeatureSlowFPBrcc : SubtargetFeature<"slow-fp-brcc", "SlowFPBrcc", "true",
+                                         "FP compare + branch is slow">;
 
 // Some processors have multiply-accumulate instructions that don't
 // play nicely with other VFP instructions, and it's generally better
@@ -129,7 +131,7 @@ def : Processor<"arm1156t2f-s",    ARMV6Itineraries,
 // V7 Processors.
 def : Processor<"cortex-a8",        CortexA8Itineraries,
                 [ArchV7A, FeatureThumb2, FeatureNEON, FeatureHasSlowVMLx,
-                 FeatureNEONForFP, FeatureT2ExtractPack]>;
+                 FeatureSlowFPBrcc, FeatureNEONForFP, FeatureT2ExtractPack]>;
 def : Processor<"cortex-a9",        CortexA9Itineraries,
                 [ArchV7A, FeatureThumb2, FeatureNEON, FeatureT2ExtractPack]>;
 def : ProcNoItin<"cortex-m3",       [ArchV7M, FeatureThumb2, FeatureHWDiv]>;
diff --git a/lib/Target/ARM/ARMAddressingModes.h b/lib/Target/ARM/ARMAddressingModes.h
index d316b13..92a13f1 100644
--- a/lib/Target/ARM/ARMAddressingModes.h
+++ b/lib/Target/ARM/ARMAddressingModes.h
@@ -519,9 +519,8 @@ namespace ARM_AM {
   //
   // This is stored in two operands [regaddr, align].  The first is the
   // address register.  The second operand is the value of the alignment
-  // specifier to use or zero if no explicit alignment.
-  // Valid alignments are: 0, 8, 16, and 32 bytes, depending on the specific
-  // instruction.
+  // specifier in bytes or zero if no explicit alignment.
+  // Valid alignments depend on the specific instruction.
 
   //===--------------------------------------------------------------------===//
   // NEON Modified Immediates
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 98d8b85..0091df7 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -565,6 +565,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case ARMISD::CMPZ:          return "ARMISD::CMPZ";
   case ARMISD::CMPFP:         return "ARMISD::CMPFP";
   case ARMISD::CMPFPw0:       return "ARMISD::CMPFPw0";
+  case ARMISD::BCC_i64:       return "ARMISD::BCC_i64";
   case ARMISD::FMSTAT:        return "ARMISD::FMSTAT";
   case ARMISD::CMOV:          return "ARMISD::CMOV";
   case ARMISD::CNEG:          return "ARMISD::CNEG";
@@ -623,6 +624,8 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case ARMISD::VQRSHRNsu:     return "ARMISD::VQRSHRNsu";
   case ARMISD::VGETLANEu:     return "ARMISD::VGETLANEu";
   case ARMISD::VGETLANEs:     return "ARMISD::VGETLANEs";
+  case ARMISD::VMOVIMM:       return "ARMISD::VMOVIMM";
+  case ARMISD::VMVNIMM:       return "ARMISD::VMVNIMM";
   case ARMISD::VDUP:          return "ARMISD::VDUP";
   case ARMISD::VDUPLANE:      return "ARMISD::VDUPLANE";
   case ARMISD::VEXT:          return "ARMISD::VEXT";
@@ -2216,7 +2219,7 @@ static bool isFloatingPointZero(SDValue Op) {
 /// the given operands.
 SDValue
 ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
-                             SDValue &ARMCC, SelectionDAG &DAG,
+                             SDValue &ARMcc, SelectionDAG &DAG,
                              DebugLoc dl) const {
   if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
     unsigned C = RHSC->getZExtValue();
@@ -2268,48 +2271,14 @@ ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
     CompareType = ARMISD::CMPZ;
     break;
   }
-  ARMCC = DAG.getConstant(CondCode, MVT::i32);
+  ARMcc = DAG.getConstant(CondCode, MVT::i32);
   return DAG.getNode(CompareType, dl, MVT::Flag, LHS, RHS);
 }
 
-static bool canBitcastToInt(SDNode *Op) {
-  return Op->hasOneUse() && 
-    ISD::isNormalLoad(Op) &&
-    Op->getValueType(0) == MVT::f32;
-}
-
-static SDValue bitcastToInt(SDValue Op, SelectionDAG &DAG) {
-  if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op))
-    return DAG.getLoad(MVT::i32, Op.getDebugLoc(),
-                       Ld->getChain(), Ld->getBasePtr(),
-                       Ld->getSrcValue(), Ld->getSrcValueOffset(),
-                       Ld->isVolatile(), Ld->isNonTemporal(),
-                       Ld->getAlignment());
-
-  llvm_unreachable("Unknown VFP cmp argument!");
-}
-
 /// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
 SDValue
-ARMTargetLowering::getVFPCmp(SDValue &LHS, SDValue &RHS, ISD::CondCode CC,
-                             SDValue &ARMCC, SelectionDAG &DAG,
+ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG,
                              DebugLoc dl) const {
-  if (UnsafeFPMath && FiniteOnlyFPMath() &&
-      (CC == ISD::SETEQ || CC == ISD::SETOEQ ||
-       CC == ISD::SETNE || CC == ISD::SETUNE) &&
-      canBitcastToInt(LHS.getNode()) && canBitcastToInt(RHS.getNode())) {
-    // If unsafe fp math optimization is enabled and there are no othter uses of
-    // the CMP operands, and the condition code is EQ oe NE, we can optimize it
-    // to an integer comparison.
-    if (CC == ISD::SETOEQ)
-      CC = ISD::SETEQ;
-    else if (CC == ISD::SETUNE)
-      CC = ISD::SETNE;
-    LHS = bitcastToInt(LHS, DAG);
-    RHS = bitcastToInt(RHS, DAG);
-    return getARMCmp(LHS, RHS, CC, ARMCC, DAG, dl);
-  }
-
   SDValue Cmp;
   if (!isFloatingPointZero(RHS))
     Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Flag, LHS, RHS);
@@ -2328,59 +2297,184 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
   DebugLoc dl = Op.getDebugLoc();
 
   if (LHS.getValueType() == MVT::i32) {
-    SDValue ARMCC;
+    SDValue ARMcc;
     SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
-    SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMCC, DAG, dl);
-    return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMCC, CCR,Cmp);
+    SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
+    return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,Cmp);
   }
 
   ARMCC::CondCodes CondCode, CondCode2;
   FPCCToARMCC(CC, CondCode, CondCode2);
 
-  SDValue ARMCC = DAG.getConstant(CondCode, MVT::i32);
+  SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32);
+  SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
   SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
-  SDValue Cmp = getVFPCmp(LHS, RHS, CC, ARMCC, DAG, dl);
   SDValue Result = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal,
-                               ARMCC, CCR, Cmp);
+                               ARMcc, CCR, Cmp);
   if (CondCode2 != ARMCC::AL) {
-    SDValue ARMCC2 = DAG.getConstant(CondCode2, MVT::i32);
+    SDValue ARMcc2 = DAG.getConstant(CondCode2, MVT::i32);
     // FIXME: Needs another CMP because flag can have but one use.
-    SDValue Cmp2 = getVFPCmp(LHS, RHS, CC, ARMCC2, DAG, dl);
+    SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl);
     Result = DAG.getNode(ARMISD::CMOV, dl, VT,
-                         Result, TrueVal, ARMCC2, CCR, Cmp2);
+                         Result, TrueVal, ARMcc2, CCR, Cmp2);
   }
   return Result;
 }
 
+/// canChangeToInt - Given the fp compare operand, return true if it is suitable
+/// to morph to an integer compare sequence.
+static bool canChangeToInt(SDValue Op, bool &SeenZero,
+                           const ARMSubtarget *Subtarget) {
+  SDNode *N = Op.getNode();
+  if (!N->hasOneUse())
+    // Otherwise it requires moving the value from fp to integer registers.
+    return false;
+  if (!N->getNumValues())
+    return false;
+  EVT VT = Op.getValueType();
+  if (VT != MVT::f32 && !Subtarget->isFPBrccSlow())
+    // f32 case is generally profitable. f64 case only makes sense when vcmpe +
+    // vmrs are very slow, e.g. cortex-a8.
+    return false;
+
+  if (isFloatingPointZero(Op)) {
+    SeenZero = true;
+    return true;
+  }
+  return ISD::isNormalLoad(N);
+}
+
+static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) {
+  if (isFloatingPointZero(Op))
+    return DAG.getConstant(0, MVT::i32);
+
+  if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op))
+    return DAG.getLoad(MVT::i32, Op.getDebugLoc(),
+                       Ld->getChain(), Ld->getBasePtr(),
+                       Ld->getSrcValue(), Ld->getSrcValueOffset(),
+                       Ld->isVolatile(), Ld->isNonTemporal(),
+                       Ld->getAlignment());
+
+  llvm_unreachable("Unknown VFP cmp argument!");
+}
+
+static void expandf64Toi32(SDValue Op, SelectionDAG &DAG,
+                           SDValue &RetVal1, SDValue &RetVal2) {
+  if (isFloatingPointZero(Op)) {
+    RetVal1 = DAG.getConstant(0, MVT::i32);
+    RetVal2 = DAG.getConstant(0, MVT::i32);
+    return;
+  }
+
+  if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) {
+    SDValue Ptr = Ld->getBasePtr();
+    RetVal1 = DAG.getLoad(MVT::i32, Op.getDebugLoc(),
+                          Ld->getChain(), Ptr,
+                          Ld->getSrcValue(), Ld->getSrcValueOffset(),
+                          Ld->isVolatile(), Ld->isNonTemporal(),
+                          Ld->getAlignment());
+
+    EVT PtrType = Ptr.getValueType();
+    unsigned NewAlign = MinAlign(Ld->getAlignment(), 4);
+    SDValue NewPtr = DAG.getNode(ISD::ADD, Op.getDebugLoc(),
+                                 PtrType, Ptr, DAG.getConstant(4, PtrType));
+    RetVal2 = DAG.getLoad(MVT::i32, Op.getDebugLoc(),
+                          Ld->getChain(), NewPtr,
+                          Ld->getSrcValue(), Ld->getSrcValueOffset() + 4,
+                          Ld->isVolatile(), Ld->isNonTemporal(),
+                          NewAlign);
+    return;
+  }
+
+  llvm_unreachable("Unknown VFP cmp argument!");
+}
+
+/// OptimizeVFPBrcond - With -enable-unsafe-fp-math, it's legal to optimize some
+/// f32 and even f64 comparisons to integer ones.
+SDValue
+ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
+  SDValue Chain = Op.getOperand(0);
+  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
+  SDValue LHS = Op.getOperand(2);
+  SDValue RHS = Op.getOperand(3);
+  SDValue Dest = Op.getOperand(4);
+  DebugLoc dl = Op.getDebugLoc();
+
+  bool SeenZero = false;
+  if (canChangeToInt(LHS, SeenZero, Subtarget) &&
+      canChangeToInt(RHS, SeenZero, Subtarget) &&
+      // If one of the operand is zero, it's safe to ignore the NaN case.
+      (FiniteOnlyFPMath() || SeenZero)) {
+    // If unsafe fp math optimization is enabled and there are no othter uses of
+    // the CMP operands, and the condition code is EQ oe NE, we can optimize it
+    // to an integer comparison.
+    if (CC == ISD::SETOEQ)
+      CC = ISD::SETEQ;
+    else if (CC == ISD::SETUNE)
+      CC = ISD::SETNE;
+
+    SDValue ARMcc;
+    if (LHS.getValueType() == MVT::f32) {
+      LHS = bitcastf32Toi32(LHS, DAG);
+      RHS = bitcastf32Toi32(RHS, DAG);
+      SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
+      SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+      return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
+                         Chain, Dest, ARMcc, CCR, Cmp);
+    }
+
+    SDValue LHS1, LHS2;
+    SDValue RHS1, RHS2;
+    expandf64Toi32(LHS, DAG, LHS1, LHS2);
+    expandf64Toi32(RHS, DAG, RHS1, RHS2);
+    ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
+    ARMcc = DAG.getConstant(CondCode, MVT::i32);
+    SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Flag);
+    SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest };
+    return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops, 7);
+  }
+
+  return SDValue();
+}
+
 SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
-  SDValue  Chain = Op.getOperand(0);
+  SDValue Chain = Op.getOperand(0);
   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
-  SDValue    LHS = Op.getOperand(2);
-  SDValue    RHS = Op.getOperand(3);
-  SDValue   Dest = Op.getOperand(4);
+  SDValue LHS = Op.getOperand(2);
+  SDValue RHS = Op.getOperand(3);
+  SDValue Dest = Op.getOperand(4);
   DebugLoc dl = Op.getDebugLoc();
 
   if (LHS.getValueType() == MVT::i32) {
-    SDValue ARMCC;
+    SDValue ARMcc;
+    SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
     SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
-    SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMCC, DAG, dl);
     return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
-                       Chain, Dest, ARMCC, CCR,Cmp);
+                       Chain, Dest, ARMcc, CCR, Cmp);
   }
 
   assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64);
+
+  if (UnsafeFPMath &&
+      (CC == ISD::SETEQ || CC == ISD::SETOEQ ||
+       CC == ISD::SETNE || CC == ISD::SETUNE)) {
+    SDValue Result = OptimizeVFPBrcond(Op, DAG);
+    if (Result.getNode())
+      return Result;
+  }
+
   ARMCC::CondCodes CondCode, CondCode2;
   FPCCToARMCC(CC, CondCode, CondCode2);
 
-  SDValue ARMCC = DAG.getConstant(CondCode, MVT::i32);
-  SDValue Cmp = getVFPCmp(LHS, RHS, CC, ARMCC, DAG, dl);
+  SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32);
+  SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
   SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
   SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Flag);
-  SDValue Ops[] = { Chain, Dest, ARMCC, CCR, Cmp };
+  SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp };
   SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5);
   if (CondCode2 != ARMCC::AL) {
-    ARMCC = DAG.getConstant(CondCode2, MVT::i32);
-    SDValue Ops[] = { Res, Dest, ARMCC, CCR, Res.getValue(1) };
+    ARMcc = DAG.getConstant(CondCode2, MVT::i32);
+    SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) };
     Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5);
   }
   return Res;
@@ -2469,12 +2563,11 @@ SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
   EVT VT = Op.getValueType();
   EVT SrcVT = Tmp1.getValueType();
   SDValue AbsVal = DAG.getNode(ISD::FABS, dl, VT, Tmp0);
-  SDValue ARMCC = DAG.getConstant(ARMCC::LT, MVT::i32);
+  SDValue ARMcc = DAG.getConstant(ARMCC::LT, MVT::i32);
   SDValue FP0 = DAG.getConstantFP(0.0, SrcVT);
-  SDValue Cmp = getVFPCmp(Tmp1, FP0,
-                          ISD::SETLT, ARMCC, DAG, dl);
+  SDValue Cmp = getVFPCmp(Tmp1, FP0, DAG, dl);
   SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
-  return DAG.getNode(ARMISD::CNEG, dl, VT, AbsVal, AbsVal, ARMCC, CCR, Cmp);
+  return DAG.getNode(ARMISD::CNEG, dl, VT, AbsVal, AbsVal, ARMcc, CCR, Cmp);
 }
 
 SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
@@ -2553,51 +2646,18 @@ static SDValue ExpandBIT_CONVERT(SDNode *N, SelectionDAG &DAG) {
 }
 
 /// getZeroVector - Returns a vector of specified type with all zero elements.
-///
+/// Zero vectors are used to represent vector negation and in those cases
+/// will be implemented with the NEON VNEG instruction.  However, VNEG does
+/// not support i64 elements, so sometimes the zero vectors will need to be
+/// explicitly constructed.  Regardless, use a canonical VMOV to create the
+/// zero vector.
 static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) {
   assert(VT.isVector() && "Expected a vector type");
-
-  // Zero vectors are used to represent vector negation and in those cases
-  // will be implemented with the NEON VNEG instruction.  However, VNEG does
-  // not support i64 elements, so sometimes the zero vectors will need to be
-  // explicitly constructed.  For those cases, and potentially other uses in
-  // the future, always build zero vectors as <16 x i8> or <8 x i8> bitcasted
-  // to their dest type.  This ensures they get CSE'd.
-  SDValue Vec;
-  SDValue Cst = DAG.getTargetConstant(0, MVT::i8);
-  SmallVector<SDValue, 8> Ops;
-  MVT TVT;
-
-  if (VT.getSizeInBits() == 64) {
-    Ops.assign(8, Cst); TVT = MVT::v8i8;
-  } else {
-    Ops.assign(16, Cst); TVT = MVT::v16i8;
-  }
-  Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, TVT, &Ops[0], Ops.size());
-
-  return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vec);
-}
-
-/// getOnesVector - Returns a vector of specified type with all bits set.
-///
-static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) {
-  assert(VT.isVector() && "Expected a vector type");
-
-  // Always build ones vectors as <16 x i8> or <8 x i8> bitcasted to their
-  // dest type. This ensures they get CSE'd.
-  SDValue Vec;
-  SDValue Cst = DAG.getTargetConstant(0xFF, MVT::i8);
-  SmallVector<SDValue, 8> Ops;
-  MVT TVT;
-
-  if (VT.getSizeInBits() == 64) {
-    Ops.assign(8, Cst); TVT = MVT::v8i8;
-  } else {
-    Ops.assign(16, Cst); TVT = MVT::v16i8;
-  }
-  Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, TVT, &Ops[0], Ops.size());
-
-  return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vec);
+  // The canonical modified immediate encoding of a zero vector is....0!
+  SDValue EncodedVal = DAG.getTargetConstant(0, MVT::i32);
+  EVT VmovVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
+  SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, EncodedVal);
+  return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vmov);
 }
 
 /// LowerShiftRightParts - Lower SRA_PARTS, which returns two
@@ -2611,7 +2671,7 @@ SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op,
   SDValue ShOpLo = Op.getOperand(0);
   SDValue ShOpHi = Op.getOperand(1);
   SDValue ShAmt  = Op.getOperand(2);
-  SDValue ARMCC;
+  SDValue ARMcc;
   unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
 
   assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS);
@@ -2627,9 +2687,9 @@ SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op,
 
   SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
   SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE,
-                          ARMCC, DAG, dl);
+                          ARMcc, DAG, dl);
   SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
-  SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMCC,
+  SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc,
                            CCR, Cmp);
 
   SDValue Ops[2] = { Lo, Hi };
@@ -2647,7 +2707,7 @@ SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op,
   SDValue ShOpLo = Op.getOperand(0);
   SDValue ShOpHi = Op.getOperand(1);
   SDValue ShAmt  = Op.getOperand(2);
-  SDValue ARMCC;
+  SDValue ARMcc;
 
   assert(Op.getOpcode() == ISD::SHL_PARTS);
   SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
@@ -2661,9 +2721,9 @@ SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op,
   SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
   SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
   SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE,
-                          ARMCC, DAG, dl);
+                          ARMcc, DAG, dl);
   SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
-  SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, Tmp3, ARMCC,
+  SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, Tmp3, ARMcc,
                            CCR, Cmp);
 
   SDValue Ops[2] = { Lo, Hi };
@@ -2850,13 +2910,11 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
 
 /// isNEONModifiedImm - Check if the specified splat value corresponds to a
 /// valid vector constant for a NEON instruction with a "modified immediate"
-/// operand (e.g., VMOV).  If so, return either the constant being
-/// splatted or the encoded value, depending on the DoEncode parameter.
+/// operand (e.g., VMOV).  If so, return the encoded value.
 static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
                                  unsigned SplatBitSize, SelectionDAG &DAG,
-                                 bool isVMOV, bool DoEncode) {
+                                 EVT &VT, bool is128Bits, bool isVMOV) {
   unsigned OpCmode, Imm;
-  EVT VT;
 
   // SplatBitSize is set to the smallest size that splats the vector, so a
   // zero vector will always have SplatBitSize == 8.  However, NEON modified
@@ -2868,16 +2926,18 @@ static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
 
   switch (SplatBitSize) {
   case 8:
+    if (!isVMOV)
+      return SDValue();
     // Any 1-byte value is OK.  Op=0, Cmode=1110.
     assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big");
     OpCmode = 0xe;
     Imm = SplatBits;
-    VT = MVT::i8;
+    VT = is128Bits ? MVT::v16i8 : MVT::v8i8;
     break;
 
   case 16:
     // NEON's 16-bit VMOV supports splat values where only one byte is nonzero.
-    VT = MVT::i16;
+    VT = is128Bits ? MVT::v8i16 : MVT::v4i16;
     if ((SplatBits & ~0xff) == 0) {
       // Value = 0x00nn: Op=x, Cmode=100x.
       OpCmode = 0x8;
@@ -2897,7 +2957,7 @@ static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
     // * only one byte is nonzero, or
     // * the least significant byte is 0xff and the second byte is nonzero, or
     // * the least significant 2 bytes are 0xff and the third is nonzero.
-    VT = MVT::i32;
+    VT = is128Bits ? MVT::v4i32 : MVT::v2i32;
     if ((SplatBits & ~0xff) == 0) {
       // Value = 0x000000nn: Op=x, Cmode=000x.
       OpCmode = 0;
@@ -2949,9 +3009,9 @@ static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
     return SDValue();
 
   case 64: {
-    // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff.
     if (!isVMOV)
       return SDValue();
+    // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff.
     uint64_t BitMask = 0xff;
     uint64_t Val = 0;
     unsigned ImmMask = 1;
@@ -2969,7 +3029,7 @@ static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
     // Op=1, Cmode=1110.
     OpCmode = 0x1e;
     SplatBits = Val;
-    VT = MVT::i64;
+    VT = is128Bits ? MVT::v2i64 : MVT::v1i64;
     break;
   }
 
@@ -2978,32 +3038,8 @@ static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
     return SDValue();
   }
 
-  if (DoEncode) {
-    unsigned EncodedVal = ARM_AM::createNEONModImm(OpCmode, Imm);
-    return DAG.getTargetConstant(EncodedVal, MVT::i32);
-  }
-  return DAG.getTargetConstant(SplatBits, VT);
-}
-
-/// getNEONModImm - If this is a valid vector constant for a NEON instruction
-/// with a "modified immediate" operand (e.g., VMOV) of the specified element
-/// size, return the encoded value for that immediate.  The ByteSize field
-/// indicates the number of bytes of each element [1248].
-SDValue ARM::getNEONModImm(SDNode *N, unsigned ByteSize, bool isVMOV,
-                           SelectionDAG &DAG) {
-  BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N);
-  APInt SplatBits, SplatUndef;
-  unsigned SplatBitSize;
-  bool HasAnyUndefs;
-  if (! BVN || ! BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize,
-                                      HasAnyUndefs, ByteSize * 8))
-    return SDValue();
-
-  if (SplatBitSize > ByteSize * 8)
-    return SDValue();
-
-  return isNEONModifiedImm(SplatBits.getZExtValue(), SplatUndef.getZExtValue(),
-                           SplatBitSize, DAG, isVMOV, true);
+  unsigned EncodedVal = ARM_AM::createNEONModImm(OpCmode, Imm);
+  return DAG.getTargetConstant(EncodedVal, MVT::i32);
 }
 
 static bool isVEXTMask(const SmallVectorImpl<int> &M, EVT VT,
@@ -3194,43 +3230,6 @@ static bool isVZIP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT,
   return true;
 }
 
-
-static SDValue BuildSplat(SDValue Val, EVT VT, SelectionDAG &DAG, DebugLoc dl) {
-  // Canonicalize all-zeros and all-ones vectors.
-  ConstantSDNode *ConstVal = cast<ConstantSDNode>(Val.getNode());
-  if (ConstVal->isNullValue())
-    return getZeroVector(VT, DAG, dl);
-  if (ConstVal->isAllOnesValue())
-    return getOnesVector(VT, DAG, dl);
-
-  EVT CanonicalVT;
-  if (VT.is64BitVector()) {
-    switch (Val.getValueType().getSizeInBits()) {
-    case 8:  CanonicalVT = MVT::v8i8; break;
-    case 16: CanonicalVT = MVT::v4i16; break;
-    case 32: CanonicalVT = MVT::v2i32; break;
-    case 64: CanonicalVT = MVT::v1i64; break;
-    default: llvm_unreachable("unexpected splat element type"); break;
-    }
-  } else {
-    assert(VT.is128BitVector() && "unknown splat vector size");
-    switch (Val.getValueType().getSizeInBits()) {
-    case 8:  CanonicalVT = MVT::v16i8; break;
-    case 16: CanonicalVT = MVT::v8i16; break;
-    case 32: CanonicalVT = MVT::v4i32; break;
-    case 64: CanonicalVT = MVT::v2i64; break;
-    default: llvm_unreachable("unexpected splat element type"); break;
-    }
-  }
-
-  // Build a canonical splat for this value.
-  SmallVector<SDValue, 8> Ops;
-  Ops.assign(CanonicalVT.getVectorNumElements(), Val);
-  SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, dl, CanonicalVT, &Ops[0],
-                            Ops.size());
-  return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Res);
-}
-
 // If this is a case we can't handle, return null and let the default
 // expansion code take care of it.
 static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
@@ -3244,11 +3243,25 @@ static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
   if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
     if (SplatBitSize <= 64) {
       // Check if an immediate VMOV works.
+      EVT VmovVT;
       SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(),
-                                      SplatUndef.getZExtValue(),
-                                      SplatBitSize, DAG, true, false);
-      if (Val.getNode())
-        return BuildSplat(Val, VT, DAG, dl);
+                                      SplatUndef.getZExtValue(), SplatBitSize,
+                                      DAG, VmovVT, VT.is128BitVector(), true);
+      if (Val.getNode()) {
+        SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, Val);
+        return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vmov);
+      }
+
+      // Try an immediate VMVN.
+      uint64_t NegatedImm = (SplatBits.getZExtValue() ^
+                             ((1LL << SplatBitSize) - 1));
+      Val = isNEONModifiedImm(NegatedImm,
+                                      SplatUndef.getZExtValue(), SplatBitSize,
+                                      DAG, VmovVT, VT.is128BitVector(), false);
+      if (Val.getNode()) {
+        SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val);
+        return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vmov);
+      }
     }
   }
 
@@ -3825,6 +3838,15 @@ ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
   return BB;
 }
 
+static
+MachineBasicBlock *OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ) {
+  for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(),
+       E = MBB->succ_end(); I != E; ++I)
+    if (*I != Succ)
+      return *I;
+  llvm_unreachable("Expecting a BB with two successors!");
+}
+
 MachineBasicBlock *
 ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
                                                MachineBasicBlock *BB) const {
@@ -3941,6 +3963,46 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
     return BB;
   }
 
+  case ARM::BCCi64:
+  case ARM::BCCZi64: {
+    // Compare both parts that make up the double comparison separately for
+    // equality.
+    bool RHSisZero = MI->getOpcode() == ARM::BCCZi64;
+
+    unsigned LHS1 = MI->getOperand(1).getReg();
+    unsigned LHS2 = MI->getOperand(2).getReg();
+    if (RHSisZero) {
+      AddDefaultPred(BuildMI(BB, dl,
+                             TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
+                     .addReg(LHS1).addImm(0));
+      BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
+        .addReg(LHS2).addImm(0)
+        .addImm(ARMCC::EQ).addReg(ARM::CPSR);
+    } else {
+      unsigned RHS1 = MI->getOperand(3).getReg();
+      unsigned RHS2 = MI->getOperand(4).getReg();
+      AddDefaultPred(BuildMI(BB, dl,
+                             TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
+                     .addReg(LHS1).addReg(RHS1));
+      BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
+        .addReg(LHS2).addReg(RHS2)
+        .addImm(ARMCC::EQ).addReg(ARM::CPSR);
+    }
+
+    MachineBasicBlock *destMBB = MI->getOperand(RHSisZero ? 3 : 5).getMBB();
+    MachineBasicBlock *exitMBB = OtherSucc(BB, destMBB);
+    if (MI->getOperand(0).getImm() == ARMCC::NE)
+      std::swap(destMBB, exitMBB);
+
+    BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
+      .addMBB(destMBB).addImm(ARMCC::EQ).addReg(ARM::CPSR);
+    BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2B : ARM::B))
+      .addMBB(exitMBB);
+
+    MI->eraseFromParent();   // The pseudo instruction is gone now.
+    return BB;
+  }
+
   case ARM::tANDsp:
   case ARM::tADDspr_:
   case ARM::tSUBspi_:
@@ -4180,6 +4242,35 @@ static SDValue PerformVMOVRRDCombine(SDNode *N,
   return SDValue();
 }
 
+/// PerformVDUPLANECombine - Target-specific dag combine xforms for
+/// ARMISD::VDUPLANE.
+static SDValue PerformVDUPLANECombine(SDNode *N,
+                                      TargetLowering::DAGCombinerInfo &DCI) {
+  // If the source is already a VMOVIMM or VMVNIMM splat, the VDUPLANE is
+  // redundant.
+  SDValue Op = N->getOperand(0);
+  EVT VT = N->getValueType(0);
+
+  // Ignore bit_converts.
+  while (Op.getOpcode() == ISD::BIT_CONVERT)
+    Op = Op.getOperand(0);
+  if (Op.getOpcode() != ARMISD::VMOVIMM && Op.getOpcode() != ARMISD::VMVNIMM)
+    return SDValue();
+
+  // Make sure the VMOV element size is not bigger than the VDUPLANE elements.
+  unsigned EltSize = Op.getValueType().getVectorElementType().getSizeInBits();
+  // The canonical VMOV for a zero vector uses a 32-bit element size.
+  unsigned Imm = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+  unsigned EltBits;
+  if (ARM_AM::decodeNEONModImm(Imm, EltBits) == 0)
+    EltSize = 8;
+  if (EltSize > VT.getVectorElementType().getSizeInBits())
+    return SDValue();
+
+  SDValue Res = DCI.DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), VT, Op);
+  return DCI.CombineTo(N, Res, false);
+}
+
 /// getVShiftImm - Check if this is a valid build_vector for the immediate
 /// operand of a vector shift operation, where all the elements of the
 /// build_vector must have the same constant integer value.
@@ -4558,6 +4649,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
   case ISD::SUB:        return PerformSUBCombine(N, DCI);
   case ISD::MUL:        return PerformMULCombine(N, DCI, Subtarget);
   case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI);
+  case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI);
   case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG);
   case ISD::SHL:
   case ISD::SRA:
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index 3a38669..128b72e 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -53,6 +53,8 @@ namespace llvm {
       CMOV,         // ARM conditional move instructions.
       CNEG,         // ARM conditional negate instructions.
 
+      BCC_i64,
+
       RBIT,         // ARM bitreverse instruction
 
       FTOSI,        // FP to sint within a FP register.
@@ -122,6 +124,10 @@ namespace llvm {
       VGETLANEu,    // zero-extend vector extract element
       VGETLANEs,    // sign-extend vector extract element
 
+      // Vector move immediate and move negated immediate:
+      VMOVIMM,
+      VMVNIMM,
+
       // Vector duplicate:
       VDUP,
       VDUPLANE,
@@ -150,13 +156,6 @@ namespace llvm {
 
   /// Define some predicates that are used for node matching.
   namespace ARM {
-    /// getNEONModImm - If this is a valid vector constant for a NEON
-    /// instruction with a "modified immediate" operand (e.g., VMOV) of the
-    /// specified element size, return the encoded value for that immediate.
-    /// The ByteSize field indicates the number of bytes of each element [1248].
-    SDValue getNEONModImm(SDNode *N, unsigned ByteSize, bool isVMOV,
-                          SelectionDAG &DAG);
-
     /// getVFPf32Imm / getVFPf64Imm - If the given fp immediate can be
     /// materialized with a VMOV.f32 / VMOV.f64 (i.e. fconsts / fconstd)
     /// instruction, returns its 8-bit integer representation. Otherwise,
@@ -363,9 +362,11 @@ namespace llvm {
                   DebugLoc dl, SelectionDAG &DAG) const;
 
     SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
-                      SDValue &ARMCC, SelectionDAG &DAG, DebugLoc dl) const;
-    SDValue getVFPCmp(SDValue &LHS, SDValue &RHS, ISD::CondCode CC,
-                      SDValue &ARMCC, SelectionDAG &DAG, DebugLoc dl) const;
+                      SDValue &ARMcc, SelectionDAG &DAG, DebugLoc dl) const;
+    SDValue getVFPCmp(SDValue LHS, SDValue RHS,
+                      SelectionDAG &DAG, DebugLoc dl) const;
+
+    SDValue OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const;
 
     MachineBasicBlock *EmitAtomicCmpSwap(MachineInstr *MI,
                                          MachineBasicBlock *BB,
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index c73e204..51fc152 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -38,6 +38,12 @@ def SDT_ARMBr2JT   : SDTypeProfile<0, 4,
                                   [SDTCisPtrTy<0>, SDTCisVT<1, i32>,
                                    SDTCisVT<2, i32>, SDTCisVT<3, i32>]>;
 
+def SDT_ARMBCC_i64 : SDTypeProfile<0, 6,
+                                  [SDTCisVT<0, i32>,
+                                   SDTCisVT<1, i32>, SDTCisVT<2, i32>,
+                                   SDTCisVT<3, i32>, SDTCisVT<4, i32>,
+                                   SDTCisVT<5, OtherVT>]>;
+
 def SDT_ARMCmp     : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>;
 
 def SDT_ARMPICAdd  : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>,
@@ -90,6 +96,9 @@ def ARMbrjt          : SDNode<"ARMISD::BR_JT", SDT_ARMBrJT,
 def ARMbr2jt         : SDNode<"ARMISD::BR2_JT", SDT_ARMBr2JT,
                               [SDNPHasChain]>;
 
+def ARMBcci64        : SDNode<"ARMISD::BCC_i64", SDT_ARMBCC_i64,
+                              [SDNPHasChain]>;
+
 def ARMcmp           : SDNode<"ARMISD::CMP", SDT_ARMCmp,
                               [SDNPOutFlag]>;
 
@@ -1685,13 +1694,19 @@ def RSCSrs : AXI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_reg:$b),
 }
 
 // (sub X, imm) gets canonicalized to (add X, -imm).  Match this form.
+// The assume-no-carry-in form uses the negation of the input since add/sub
+// assume opposite meanings of the carry flag (i.e., carry == !borrow).
+// See the definition of AddWithCarry() in the ARM ARM A2.2.1 for the gory
+// details.
 def : ARMPat<(add    GPR:$src, so_imm_neg:$imm),
              (SUBri  GPR:$src, so_imm_neg:$imm)>;
-
-//def : ARMPat<(addc   GPR:$src, so_imm_neg:$imm),
-//             (SUBSri GPR:$src, so_imm_neg:$imm)>;
-//def : ARMPat<(adde   GPR:$src, so_imm_neg:$imm),
-//             (SBCri  GPR:$src, so_imm_neg:$imm)>;
+def : ARMPat<(addc   GPR:$src, so_imm_neg:$imm),
+             (SUBSri GPR:$src, so_imm_neg:$imm)>;
+// The with-carry-in form matches bitwise not instead of the negation.
+// Effectively, the inverse interpretation of the carry flag already accounts
+// for part of the negation.
+def : ARMPat<(adde   GPR:$src, so_imm_not:$imm),
+             (SBCri  GPR:$src, so_imm_not:$imm)>;
 
 // Note: These are implemented in C++ code, because they have to generate
 // ADD/SUBrs instructions, which use a complex pattern that a xform function
@@ -2279,6 +2294,22 @@ defm CMNz  : AI1_cmp_irs<0b1011, "cmn",
 def : ARMPat<(ARMcmpZ GPR:$src, so_imm_neg:$imm),
              (CMNzri  GPR:$src, so_imm_neg:$imm)>;
 
+// Pseudo i64 compares for some floating point compares.
+let usesCustomInserter = 1, isBranch = 1, isTerminator = 1,
+    Defs = [CPSR] in {
+def BCCi64 : PseudoInst<(outs),
+     (ins i32imm:$cc, GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, brtarget:$dst),
+      IIC_Br,
+     "${:comment} B\t$dst GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, imm:$cc",
+    [(ARMBcci64 imm:$cc, GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, bb:$dst)]>;
+
+def BCCZi64 : PseudoInst<(outs),
+     (ins i32imm:$cc, GPR:$lhs1, GPR:$lhs2, brtarget:$dst),
+      IIC_Br,
+     "${:comment} B\t$dst GPR:$lhs1, GPR:$lhs2, 0, 0, imm:$cc",
+    [(ARMBcci64 imm:$cc, GPR:$lhs1, GPR:$lhs2, 0, 0, bb:$dst)]>;
+} // usesCustomInserter
+
 
 // Conditional moves
 // FIXME: should be able to write a pattern for ARMcmov, but can't use
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index a84315f..7f7eb98 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -65,6 +65,10 @@ def SDTARMVGETLN  : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisInt<1>,
 def NEONvgetlaneu : SDNode<"ARMISD::VGETLANEu", SDTARMVGETLN>;
 def NEONvgetlanes : SDNode<"ARMISD::VGETLANEs", SDTARMVGETLN>;
 
+def SDTARMVMOVIMM : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>;
+def NEONvmovImm   : SDNode<"ARMISD::VMOVIMM", SDTARMVMOVIMM>;
+def NEONvmvnImm   : SDNode<"ARMISD::VMVNIMM", SDTARMVMOVIMM>;
+
 def NEONvdup      : SDNode<"ARMISD::VDUP", SDTypeProfile<1, 1, [SDTCisVec<0>]>>;
 
 // VDUPLANE can produce a quad-register result from a double-register source,
@@ -94,6 +98,20 @@ def SDTARMFMAX    : SDTypeProfile<1, 2, [SDTCisVT<0, f32>, SDTCisSameAs<0, 1>,
 def NEONfmax      : SDNode<"ARMISD::FMAX", SDTARMFMAX>;
 def NEONfmin      : SDNode<"ARMISD::FMIN", SDTARMFMAX>;
 
+def NEONimmAllZerosV: PatLeaf<(NEONvmovImm (i32 timm)), [{
+  ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0));
+  unsigned EltBits;
+  uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits);
+  return (EltBits == 32 && EltVal == 0);
+}]>;
+
+def NEONimmAllOnesV: PatLeaf<(NEONvmovImm (i32 timm)), [{
+  ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0));
+  unsigned EltBits;
+  uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits);
+  return (EltBits == 8 && EltVal == 0xff);
+}]>;
+
 //===----------------------------------------------------------------------===//
 // NEON operand definitions
 //===----------------------------------------------------------------------===//
@@ -2318,10 +2336,10 @@ defm VTST     : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
 
 // Vector Bitwise Operations.
 
-def vnot8 : PatFrag<(ops node:$in),
-                    (xor node:$in, (bitconvert (v8i8 immAllOnesV)))>;
-def vnot16 : PatFrag<(ops node:$in),
-                     (xor node:$in, (bitconvert (v16i8 immAllOnesV)))>;
+def vnotd : PatFrag<(ops node:$in),
+                    (xor node:$in, (bitconvert (v8i8 NEONimmAllOnesV)))>;
+def vnotq : PatFrag<(ops node:$in),
+                    (xor node:$in, (bitconvert (v16i8 NEONimmAllOnesV)))>;
 
 
 //   VAND     : Vector Bitwise AND
@@ -2347,36 +2365,58 @@ def  VBICd    : N3VX<0, 0, 0b01, 0b0001, 0, 1, (outs DPR:$dst),
                      (ins DPR:$src1, DPR:$src2), N3RegFrm, IIC_VBINiD,
                      "vbic", "$dst, $src1, $src2", "",
                      [(set DPR:$dst, (v2i32 (and DPR:$src1,
-                                                 (vnot8 DPR:$src2))))]>;
+                                                 (vnotd DPR:$src2))))]>;
 def  VBICq    : N3VX<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$dst),
                      (ins QPR:$src1, QPR:$src2), N3RegFrm, IIC_VBINiQ,
                      "vbic", "$dst, $src1, $src2", "",
                      [(set QPR:$dst, (v4i32 (and QPR:$src1,
-                                                 (vnot16 QPR:$src2))))]>;
+                                                 (vnotq QPR:$src2))))]>;
 
 //   VORN     : Vector Bitwise OR NOT
 def  VORNd    : N3VX<0, 0, 0b11, 0b0001, 0, 1, (outs DPR:$dst),
                      (ins DPR:$src1, DPR:$src2), N3RegFrm, IIC_VBINiD,
                      "vorn", "$dst, $src1, $src2", "",
                      [(set DPR:$dst, (v2i32 (or DPR:$src1,
-                                                (vnot8 DPR:$src2))))]>;
+                                                (vnotd DPR:$src2))))]>;
 def  VORNq    : N3VX<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$dst),
                      (ins QPR:$src1, QPR:$src2), N3RegFrm, IIC_VBINiQ,
                      "vorn", "$dst, $src1, $src2", "",
                      [(set QPR:$dst, (v4i32 (or QPR:$src1,
-                                                (vnot16 QPR:$src2))))]>;
+                                                (vnotq QPR:$src2))))]>;
+
+//   VMVN     : Vector Bitwise NOT (Immediate)
+
+let isReMaterializable = 1 in {
+def VMVNv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 1, 1, (outs DPR:$dst),
+                         (ins nModImm:$SIMM), IIC_VMOVImm,
+                         "vmvn", "i16", "$dst, $SIMM", "",
+                         [(set DPR:$dst, (v4i16 (NEONvmvnImm timm:$SIMM)))]>;
+def VMVNv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 1, 1, (outs QPR:$dst),
+                         (ins nModImm:$SIMM), IIC_VMOVImm,
+                         "vmvn", "i16", "$dst, $SIMM", "",
+                         [(set QPR:$dst, (v8i16 (NEONvmvnImm timm:$SIMM)))]>;
+
+def VMVNv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 1, 1, (outs DPR:$dst),
+                         (ins nModImm:$SIMM), IIC_VMOVImm,
+                         "vmvn", "i32", "$dst, $SIMM", "",
+                         [(set DPR:$dst, (v2i32 (NEONvmvnImm timm:$SIMM)))]>;
+def VMVNv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 1, 1, (outs QPR:$dst),
+                         (ins nModImm:$SIMM), IIC_VMOVImm,
+                         "vmvn", "i32", "$dst, $SIMM", "",
+                         [(set QPR:$dst, (v4i32 (NEONvmvnImm timm:$SIMM)))]>;
+}
 
 //   VMVN     : Vector Bitwise NOT
 def  VMVNd    : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 0, 0,
                      (outs DPR:$dst), (ins DPR:$src), IIC_VSUBiD,
                      "vmvn", "$dst, $src", "",
-                     [(set DPR:$dst, (v2i32 (vnot8 DPR:$src)))]>;
+                     [(set DPR:$dst, (v2i32 (vnotd DPR:$src)))]>;
 def  VMVNq    : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 1, 0,
                      (outs QPR:$dst), (ins QPR:$src), IIC_VSUBiD,
                      "vmvn", "$dst, $src", "",
-                     [(set QPR:$dst, (v4i32 (vnot16 QPR:$src)))]>;
-def : Pat<(v2i32 (vnot8 DPR:$src)), (VMVNd DPR:$src)>;
-def : Pat<(v4i32 (vnot16 QPR:$src)), (VMVNq QPR:$src)>;
+                     [(set QPR:$dst, (v4i32 (vnotq QPR:$src)))]>;
+def : Pat<(v2i32 (vnotd DPR:$src)), (VMVNd DPR:$src)>;
+def : Pat<(v4i32 (vnotq QPR:$src)), (VMVNq QPR:$src)>;
 
 //   VBSL     : Vector Bitwise Select
 def  VBSLd    : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$dst),
@@ -2385,14 +2425,14 @@ def  VBSLd    : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$dst),
                      "vbsl", "$dst, $src2, $src3", "$src1 = $dst",
                      [(set DPR:$dst,
                        (v2i32 (or (and DPR:$src2, DPR:$src1),
-                                  (and DPR:$src3, (vnot8 DPR:$src1)))))]>;
+                                  (and DPR:$src3, (vnotd DPR:$src1)))))]>;
 def  VBSLq    : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$dst),
                      (ins QPR:$src1, QPR:$src2, QPR:$src3),
                      N3RegFrm, IIC_VCNTiQ,
                      "vbsl", "$dst, $src2, $src3", "$src1 = $dst",
                      [(set QPR:$dst,
                        (v4i32 (or (and QPR:$src2, QPR:$src1),
-                                  (and QPR:$src3, (vnot16 QPR:$src1)))))]>;
+                                  (and QPR:$src3, (vnotq QPR:$src1)))))]>;
 
 //   VBIF     : Vector Bitwise Insert if False
 //              like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst",
@@ -2726,20 +2766,19 @@ defm VQABS    : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0,
 
 // Vector Negate.
 
-def vneg   : PatFrag<(ops node:$in), (sub immAllZerosV, node:$in)>;
-def vneg8  : PatFrag<(ops node:$in),
-                     (sub (bitconvert (v8i8 immAllZerosV)), node:$in)>;
-def vneg16 : PatFrag<(ops node:$in),
-                     (sub (bitconvert (v16i8 immAllZerosV)), node:$in)>;
+def vnegd  : PatFrag<(ops node:$in),
+                     (sub (bitconvert (v2i32 NEONimmAllZerosV)), node:$in)>;
+def vnegq  : PatFrag<(ops node:$in),
+                     (sub (bitconvert (v4i32 NEONimmAllZerosV)), node:$in)>;
 
 class VNEGD<bits<2> size, string OpcodeStr, string Dt, ValueType Ty>
   : N2V<0b11, 0b11, size, 0b01, 0b00111, 0, 0, (outs DPR:$dst), (ins DPR:$src),
         IIC_VSHLiD, OpcodeStr, Dt, "$dst, $src", "",
-        [(set DPR:$dst, (Ty (vneg8 DPR:$src)))]>;
+        [(set DPR:$dst, (Ty (vnegd DPR:$src)))]>;
 class VNEGQ<bits<2> size, string OpcodeStr, string Dt, ValueType Ty>
   : N2V<0b11, 0b11, size, 0b01, 0b00111, 1, 0, (outs QPR:$dst), (ins QPR:$src),
         IIC_VSHLiD, OpcodeStr, Dt, "$dst, $src", "",
-        [(set QPR:$dst, (Ty (vneg16 QPR:$src)))]>;
+        [(set QPR:$dst, (Ty (vnegq QPR:$src)))]>;
 
 //   VNEG     : Vector Negate (integer)
 def  VNEGs8d  : VNEGD<0b00, "vneg", "s8", v8i8>;
@@ -2759,12 +2798,12 @@ def  VNEGf32q : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 1, 0,
                     "vneg", "f32", "$dst, $src", "",
                     [(set QPR:$dst, (v4f32 (fneg QPR:$src)))]>;
 
-def : Pat<(v8i8  (vneg8  DPR:$src)), (VNEGs8d DPR:$src)>;
-def : Pat<(v4i16 (vneg8  DPR:$src)), (VNEGs16d DPR:$src)>;
-def : Pat<(v2i32 (vneg8  DPR:$src)), (VNEGs32d DPR:$src)>;
-def : Pat<(v16i8 (vneg16 QPR:$src)), (VNEGs8q QPR:$src)>;
-def : Pat<(v8i16 (vneg16 QPR:$src)), (VNEGs16q QPR:$src)>;
-def : Pat<(v4i32 (vneg16 QPR:$src)), (VNEGs32q QPR:$src)>;
+def : Pat<(v8i8  (vnegd  DPR:$src)), (VNEGs8d DPR:$src)>;
+def : Pat<(v4i16 (vnegd  DPR:$src)), (VNEGs16d DPR:$src)>;
+def : Pat<(v2i32 (vnegd  DPR:$src)), (VNEGs32d DPR:$src)>;
+def : Pat<(v16i8 (vnegq QPR:$src)), (VNEGs8q QPR:$src)>;
+def : Pat<(v8i16 (vnegq QPR:$src)), (VNEGs16q QPR:$src)>;
+def : Pat<(v4i32 (vnegq QPR:$src)), (VNEGs32q QPR:$src)>;
 
 //   VQNEG    : Vector Saturating Negate
 defm VQNEG    : N2VInt_QHS<0b11, 0b11, 0b00, 0b01111, 0, 
@@ -2818,74 +2857,42 @@ def  VMOVQQQQ : PseudoInst<(outs QQQQPR:$dst), (ins QQQQPR:$src),
 
 //   VMOV     : Vector Move (Immediate)
 
-// VMOV_get_imm8 xform function: convert build_vector to VMOV.i8 imm.
-def VMOV_get_imm8 : SDNodeXForm<build_vector, [{
-  return ARM::getNEONModImm(N, 1, true, *CurDAG);
-}]>;
-def vmovImm8 : PatLeaf<(build_vector), [{
-  return ARM::getNEONModImm(N, 1, true, *CurDAG).getNode() != 0;
-}], VMOV_get_imm8>;
-
-// VMOV_get_imm16 xform function: convert build_vector to VMOV.i16 imm.
-def VMOV_get_imm16 : SDNodeXForm<build_vector, [{
-  return ARM::getNEONModImm(N, 2, true, *CurDAG);
-}]>;
-def vmovImm16 : PatLeaf<(build_vector), [{
-  return ARM::getNEONModImm(N, 2, true, *CurDAG).getNode() != 0;
-}], VMOV_get_imm16>;
-
-// VMOV_get_imm32 xform function: convert build_vector to VMOV.i32 imm.
-def VMOV_get_imm32 : SDNodeXForm<build_vector, [{
-  return ARM::getNEONModImm(N, 4, true, *CurDAG);
-}]>;
-def vmovImm32 : PatLeaf<(build_vector), [{
-  return ARM::getNEONModImm(N, 4, true, *CurDAG).getNode() != 0;
-}], VMOV_get_imm32>;
-
-// VMOV_get_imm64 xform function: convert build_vector to VMOV.i64 imm.
-def VMOV_get_imm64 : SDNodeXForm<build_vector, [{
-  return ARM::getNEONModImm(N, 8, true, *CurDAG);
-}]>;
-def vmovImm64 : PatLeaf<(build_vector), [{
-  return ARM::getNEONModImm(N, 8, true, *CurDAG).getNode() != 0;
-}], VMOV_get_imm64>;
-
 let isReMaterializable = 1 in {
 def VMOVv8i8  : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$dst),
                          (ins nModImm:$SIMM), IIC_VMOVImm,
                          "vmov", "i8", "$dst, $SIMM", "",
-                         [(set DPR:$dst, (v8i8 vmovImm8:$SIMM))]>;
+                         [(set DPR:$dst, (v8i8 (NEONvmovImm timm:$SIMM)))]>;
 def VMOVv16i8 : N1ModImm<1, 0b000, 0b1110, 0, 1, 0, 1, (outs QPR:$dst),
                          (ins nModImm:$SIMM), IIC_VMOVImm,
                          "vmov", "i8", "$dst, $SIMM", "",
-                         [(set QPR:$dst, (v16i8 vmovImm8:$SIMM))]>;
+                         [(set QPR:$dst, (v16i8 (NEONvmovImm timm:$SIMM)))]>;
 
 def VMOVv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 0, 1, (outs DPR:$dst),
                          (ins nModImm:$SIMM), IIC_VMOVImm,
                          "vmov", "i16", "$dst, $SIMM", "",
-                         [(set DPR:$dst, (v4i16 vmovImm16:$SIMM))]>;
+                         [(set DPR:$dst, (v4i16 (NEONvmovImm timm:$SIMM)))]>;
 def VMOVv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 0, 1, (outs QPR:$dst),
                          (ins nModImm:$SIMM), IIC_VMOVImm,
                          "vmov", "i16", "$dst, $SIMM", "",
-                         [(set QPR:$dst, (v8i16 vmovImm16:$SIMM))]>;
+                         [(set QPR:$dst, (v8i16 (NEONvmovImm timm:$SIMM)))]>;
 
-def VMOVv2i32 : N1ModImm<1, 0b000, {0,?,?,0}, 0, 0, 0, 1, (outs DPR:$dst),
+def VMOVv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 0, 1, (outs DPR:$dst),
                          (ins nModImm:$SIMM), IIC_VMOVImm,
                          "vmov", "i32", "$dst, $SIMM", "",
-                         [(set DPR:$dst, (v2i32 vmovImm32:$SIMM))]>;
-def VMOVv4i32 : N1ModImm<1, 0b000, {0,?,?,0}, 0, 1, 0, 1, (outs QPR:$dst),
+                         [(set DPR:$dst, (v2i32 (NEONvmovImm timm:$SIMM)))]>;
+def VMOVv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 0, 1, (outs QPR:$dst),
                          (ins nModImm:$SIMM), IIC_VMOVImm,
                          "vmov", "i32", "$dst, $SIMM", "",
-                         [(set QPR:$dst, (v4i32 vmovImm32:$SIMM))]>;
+                         [(set QPR:$dst, (v4i32 (NEONvmovImm timm:$SIMM)))]>;
 
 def VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$dst),
                          (ins nModImm:$SIMM), IIC_VMOVImm,
                          "vmov", "i64", "$dst, $SIMM", "",
-                         [(set DPR:$dst, (v1i64 vmovImm64:$SIMM))]>;
+                         [(set DPR:$dst, (v1i64 (NEONvmovImm timm:$SIMM)))]>;
 def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$dst),
                          (ins nModImm:$SIMM), IIC_VMOVImm,
                          "vmov", "i64", "$dst, $SIMM", "",
-                         [(set QPR:$dst, (v2i64 vmovImm64:$SIMM))]>;
+                         [(set QPR:$dst, (v2i64 (NEONvmovImm timm:$SIMM)))]>;
 } // isReMaterializable
 
 //   VMOV     : Vector Get Lane (move scalar to ARM core register)
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index 4692f2a..bbe675e 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -122,6 +122,10 @@ def imm0_255_neg : PatLeaf<(i32 imm), [{
   return (uint32_t)(-N->getZExtValue()) < 255;
 }], imm_neg_XFORM>;
 
+def imm0_255_not : PatLeaf<(i32 imm), [{
+  return (uint32_t)(~N->getZExtValue()) < 255;
+}], imm_comp_XFORM>;
+
 // Define Thumb2 specific addressing modes.
 
 // t2addrmode_imm12  := reg + imm12
@@ -1391,13 +1395,32 @@ defm t2RSBS : T2I_rbin_s_is <0b1110, "rsb",
                              BinOpFrag<(subc node:$LHS, node:$RHS)>>;
 
 // (sub X, imm) gets canonicalized to (add X, -imm).  Match this form.
+// The assume-no-carry-in form uses the negation of the input since add/sub
+// assume opposite meanings of the carry flag (i.e., carry == !borrow).
+// See the definition of AddWithCarry() in the ARM ARM A2.2.1 for the gory
+// details.
+// The AddedComplexity preferences the first variant over the others since
+// it can be shrunk to a 16-bit wide encoding, while the others cannot.
+let AddedComplexity = 1 in
+def : T2Pat<(add        GPR:$src, imm0_255_neg:$imm),
+            (t2SUBri    GPR:$src, imm0_255_neg:$imm)>;
+def : T2Pat<(add        GPR:$src, t2_so_imm_neg:$imm),
+            (t2SUBri    GPR:$src, t2_so_imm_neg:$imm)>;
+def : T2Pat<(add        GPR:$src, imm0_4095_neg:$imm),
+            (t2SUBri12  GPR:$src, imm0_4095_neg:$imm)>;
+let AddedComplexity = 1 in
+def : T2Pat<(addc       GPR:$src, imm0_255_neg:$imm),
+            (t2SUBSri   GPR:$src, imm0_255_neg:$imm)>;
+def : T2Pat<(addc       GPR:$src, t2_so_imm_neg:$imm),
+            (t2SUBSri   GPR:$src, t2_so_imm_neg:$imm)>;
+// The with-carry-in form matches bitwise not instead of the negation.
+// Effectively, the inverse interpretation of the carry flag already accounts
+// for part of the negation.
 let AddedComplexity = 1 in
-def : T2Pat<(add       GPR:$src, imm0_255_neg:$imm),
-            (t2SUBri   GPR:$src, imm0_255_neg:$imm)>;
-def : T2Pat<(add       GPR:$src, t2_so_imm_neg:$imm),
-            (t2SUBri   GPR:$src, t2_so_imm_neg:$imm)>;
-def : T2Pat<(add       GPR:$src, imm0_4095_neg:$imm),
-            (t2SUBri12 GPR:$src, imm0_4095_neg:$imm)>;
+def : T2Pat<(adde       GPR:$src, imm0_255_not:$imm),
+            (t2SBCSri   GPR:$src, imm0_255_not:$imm)>;
+def : T2Pat<(adde       GPR:$src, t2_so_imm_not:$imm),
+            (t2SBCSri   GPR:$src, t2_so_imm_not:$imm)>;
 
 // Select Bytes -- for disassembly only
 
@@ -2435,7 +2458,7 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1,
     hasExtraDefRegAllocReq = 1 in
   def t2LDM_RET : T2XIt<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
                                          reglist:$dsts, variable_ops), IIC_Br,
-                        "ldm${addr:submode}${p}${addr:wide}\t$addr, $dsts",
+                        "ldm${addr:submode}${p}${addr:wide}\t$addr!, $dsts",
                         "$addr.addr = $wb", []> {
   let Inst{31-27} = 0b11101;
   let Inst{26-25} = 0b00;
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index 8332bba..e7d92ed 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -54,6 +54,9 @@ protected:
   /// the VML[AS] instructions are slow (if so, don't use them).
   bool SlowVMLx;
 
+  /// SlowFPBrcc - True if floating point compare + branch is slow.
+  bool SlowFPBrcc;
+
   /// IsThumb - True if we are in thumb mode, false if in ARM mode.
   bool IsThumb;
 
@@ -133,6 +136,7 @@ protected:
   bool hasDivide() const { return HasHardwareDivide; }
   bool hasT2ExtractPack() const { return HasT2ExtractPack; }
   bool useVMLx() const {return hasVFP2() && !SlowVMLx; }
+  bool isFPBrccSlow() const { return SlowFPBrcc; }
 
   bool hasFP16() const { return HasFP16; }
 
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index 8415d1a..4b08324 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -88,7 +88,7 @@ private:
   /// its register number, or -1 if there is no match.  To allow return values
   /// to be used directly in register lists, arm registers have values between
   /// 0 and 15.
-  int MatchRegisterName(const StringRef &Name);
+  int MatchRegisterName(StringRef Name);
 
   /// }
 
@@ -97,7 +97,7 @@ public:
   ARMAsmParser(const Target &T, MCAsmParser &_Parser)
     : TargetAsmParser(T), Parser(_Parser) {}
 
-  virtual bool ParseInstruction(const StringRef &Name, SMLoc NameLoc,
+  virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc,
                                 SmallVectorImpl<MCParsedAsmOperand*> &Operands);
 
   virtual bool ParseDirective(AsmToken DirectiveID);
@@ -517,7 +517,7 @@ bool ARMAsmParser::ParseShift(ShiftType &St,
   const AsmToken &Tok = Parser.getTok();
   if (Tok.isNot(AsmToken::Identifier))
     return true;
-  const StringRef &ShiftName = Tok.getString();
+  StringRef ShiftName = Tok.getString();
   if (ShiftName == "lsl" || ShiftName == "LSL")
     St = Lsl;
   else if (ShiftName == "lsr" || ShiftName == "LSR")
@@ -549,7 +549,7 @@ bool ARMAsmParser::ParseShift(ShiftType &St,
 }
 
 /// A hack to allow some testing, to be replaced by a real table gen version.
-int ARMAsmParser::MatchRegisterName(const StringRef &Name) {
+int ARMAsmParser::MatchRegisterName(StringRef Name) {
   if (Name == "r0" || Name == "R0")
     return 0;
   else if (Name == "r1" || Name == "R1")
@@ -593,7 +593,7 @@ MatchInstruction(const SmallVectorImpl<MCParsedAsmOperand*> &Operands,
                  MCInst &Inst) {
   ARMOperand &Op0 = *(ARMOperand*)Operands[0];
   assert(Op0.Kind == ARMOperand::Token && "First operand not a Token");
-  const StringRef &Mnemonic = Op0.getToken();
+  StringRef Mnemonic = Op0.getToken();
   if (Mnemonic == "add" ||
       Mnemonic == "stmfd" ||
       Mnemonic == "str" ||
@@ -658,7 +658,7 @@ bool ARMAsmParser::ParseOperand(OwningPtr<ARMOperand> &Op) {
 }
 
 /// Parse an arm instruction mnemonic followed by its operands.
-bool ARMAsmParser::ParseInstruction(const StringRef &Name, SMLoc NameLoc,
+bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc,
                                SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
   OwningPtr<ARMOperand> Op;
   ARMOperand::CreateToken(Op, Name, NameLoc);
@@ -761,7 +761,7 @@ bool ARMAsmParser::ParseDirectiveSyntax(SMLoc L) {
   const AsmToken &Tok = Parser.getTok();
   if (Tok.isNot(AsmToken::Identifier))
     return Error(L, "unexpected token in .syntax directive");
-  const StringRef &Mode = Tok.getString();
+  StringRef Mode = Tok.getString();
   if (Mode == "unified" || Mode == "UNIFIED")
     Parser.Lex();
   else if (Mode == "divided" || Mode == "DIVIDED")
diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
index 6a40cf3..946f474 100644
--- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
@@ -602,12 +602,8 @@ void ARMAsmPrinter::printAddrMode6Operand(const MachineInstr *MI, int Op,
 
   O << "[" << getRegisterName(MO1.getReg());
   if (MO2.getImm()) {
-    unsigned Align = MO2.getImm();
-    assert((Align == 8 || Align == 16 || Align == 32) &&
-           "unexpected NEON load/store alignment");
-    Align <<= 3;
     // FIXME: Both darwin as and GNU as violate ARM docs here.
-    O << ", :" << Align;
+    O << ", :" << (MO2.getImm() << 3);
   }
   O << "]";
 }
diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp
index 170819a..edc9345 100644
--- a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp
+++ b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp
@@ -442,7 +442,7 @@ void ARMInstPrinter::printAddrMode6Operand(const MCInst *MI, unsigned OpNum,
   O << "[" << getRegisterName(MO1.getReg());
   if (MO2.getImm()) {
     // FIXME: Both darwin as and GNU as violate ARM docs here.
-    O << ", :" << MO2.getImm();
+    O << ", :" << (MO2.getImm() << 3);
   }
   O << "]";
 }
diff --git a/lib/Target/ARM/README.txt b/lib/Target/ARM/README.txt
index 85d5ca0..0cb8ff0 100644
--- a/lib/Target/ARM/README.txt
+++ b/lib/Target/ARM/README.txt
@@ -590,3 +590,70 @@ than the Z bit, we'll need additional logic to reverse the conditionals
 associated with the comparison. Perhaps a pseudo-instruction for the comparison,
 with a post-codegen pass to clean up and handle the condition codes?
 See PR5694 for testcase.
+
+//===---------------------------------------------------------------------===//
+
+Given the following on armv5:
+int test1(int A, int B) {
+  return (A&-8388481)|(B&8388480);
+}
+
+We currently generate:
+	ldr	r2, .LCPI0_0
+	and	r0, r0, r2
+	ldr	r2, .LCPI0_1
+	and	r1, r1, r2
+	orr	r0, r1, r0
+	bx	lr
+
+We should be able to replace the second ldr+and with a bic (i.e. reuse the
+constant which was already loaded).  Not sure what's necessary to do that.
+
+//===---------------------------------------------------------------------===//
+
+Given the following on ARMv7:
+int test1(int A, int B) {
+  return (A&-8388481)|(B&8388480);
+}
+
+We currently generate:
+	bfc	r0, #7, #16
+	movw	r2, #:lower16:8388480
+	movt	r2, #:upper16:8388480
+	and	r1, r1, r2
+	orr	r0, r1, r0
+	bx	lr
+
+The following is much shorter:
+	lsr	r1, r1, #7
+	bfi	r0, r1, #7, #16
+	bx	lr
+
+
+//===---------------------------------------------------------------------===//
+
+The code generated for bswap on armv4/5 (CPUs without rev) is less than ideal:
+
+int a(int x) { return __builtin_bswap32(x); }
+
+a:
+	mov	r1, #255, 24
+	mov	r2, #255, 16
+	and	r1, r1, r0, lsr #8
+	and	r2, r2, r0, lsl #8
+	orr	r1, r1, r0, lsr #24
+	orr	r0, r2, r0, lsl #24
+	orr	r0, r0, r1
+	bx	lr
+
+Something like the following would be better (fewer instructions/registers):
+	eor     r1, r0, r0, ror #16
+	bic     r1, r1, #0xff0000
+	mov     r1, r1, lsr #8
+	eor     r0, r1, r0, ror #8
+	bx	lr
+
+A custom Thumb version would also be a slight improvement over the generic
+version.
+
+//===---------------------------------------------------------------------===//
diff --git a/lib/Target/Alpha/AlphaMCAsmInfo.cpp b/lib/Target/Alpha/AlphaMCAsmInfo.cpp
index c67c6a2..a35e884 100644
--- a/lib/Target/Alpha/AlphaMCAsmInfo.cpp
+++ b/lib/Target/Alpha/AlphaMCAsmInfo.cpp
@@ -14,7 +14,7 @@
 #include "AlphaMCAsmInfo.h"
 using namespace llvm;
 
-AlphaMCAsmInfo::AlphaMCAsmInfo(const Target &T, const StringRef &TT) {
+AlphaMCAsmInfo::AlphaMCAsmInfo(const Target &T, StringRef TT) {
   AlignmentIsInBytes = false;
   PrivateGlobalPrefix = "$";
   GPRel32Directive = ".gprel32";
diff --git a/lib/Target/Alpha/AlphaMCAsmInfo.h b/lib/Target/Alpha/AlphaMCAsmInfo.h
index c27065d..837844b 100644
--- a/lib/Target/Alpha/AlphaMCAsmInfo.h
+++ b/lib/Target/Alpha/AlphaMCAsmInfo.h
@@ -14,14 +14,14 @@
 #ifndef ALPHATARGETASMINFO_H
 #define ALPHATARGETASMINFO_H
 
+#include "llvm/ADT/StringRef.h"
 #include "llvm/MC/MCAsmInfo.h"
 
 namespace llvm {
   class Target;
-  class StringRef;
 
   struct AlphaMCAsmInfo : public MCAsmInfo {
-    explicit AlphaMCAsmInfo(const Target &T, const StringRef &TT);
+    explicit AlphaMCAsmInfo(const Target &T, StringRef TT);
   };
 
 } // namespace llvm
diff --git a/lib/Target/Blackfin/BlackfinMCAsmInfo.cpp b/lib/Target/Blackfin/BlackfinMCAsmInfo.cpp
index 31470fb..5b9d4a2 100644
--- a/lib/Target/Blackfin/BlackfinMCAsmInfo.cpp
+++ b/lib/Target/Blackfin/BlackfinMCAsmInfo.cpp
@@ -15,7 +15,7 @@
 
 using namespace llvm;
 
-BlackfinMCAsmInfo::BlackfinMCAsmInfo(const Target &T, const StringRef &TT) {
+BlackfinMCAsmInfo::BlackfinMCAsmInfo(const Target &T, StringRef TT) {
   GlobalPrefix = "_";
   CommentString = "//";
   HasSetDirective = false;
diff --git a/lib/Target/Blackfin/BlackfinMCAsmInfo.h b/lib/Target/Blackfin/BlackfinMCAsmInfo.h
index 0efc295..c372aa2 100644
--- a/lib/Target/Blackfin/BlackfinMCAsmInfo.h
+++ b/lib/Target/Blackfin/BlackfinMCAsmInfo.h
@@ -14,14 +14,14 @@
 #ifndef BLACKFINTARGETASMINFO_H
 #define BLACKFINTARGETASMINFO_H
 
+#include "llvm/ADT/StringRef.h"
 #include "llvm/MC/MCAsmInfo.h"
 
 namespace llvm {
   class Target;
-  class StringRef;
 
   struct BlackfinMCAsmInfo : public MCAsmInfo {
-    explicit BlackfinMCAsmInfo(const Target &T, const StringRef &TT);
+    explicit BlackfinMCAsmInfo(const Target &T, StringRef TT);
   };
 
 } // namespace llvm
diff --git a/lib/Target/CellSPU/SPUMCAsmInfo.cpp b/lib/Target/CellSPU/SPUMCAsmInfo.cpp
index 68445cf..25ba88a 100644
--- a/lib/Target/CellSPU/SPUMCAsmInfo.cpp
+++ b/lib/Target/CellSPU/SPUMCAsmInfo.cpp
@@ -14,7 +14,7 @@
 #include "SPUMCAsmInfo.h"
 using namespace llvm;
 
-SPULinuxMCAsmInfo::SPULinuxMCAsmInfo(const Target &T, const StringRef &TT) {
+SPULinuxMCAsmInfo::SPULinuxMCAsmInfo(const Target &T, StringRef TT) {
   ZeroDirective = "\t.space\t";
   Data64bitsDirective = "\t.quad\t";
   AlignmentIsInBytes = false;
diff --git a/lib/Target/CellSPU/SPUMCAsmInfo.h b/lib/Target/CellSPU/SPUMCAsmInfo.h
index 8d75ea8..7f850d3 100644
--- a/lib/Target/CellSPU/SPUMCAsmInfo.h
+++ b/lib/Target/CellSPU/SPUMCAsmInfo.h
@@ -14,14 +14,14 @@
 #ifndef SPUTARGETASMINFO_H
 #define SPUTARGETASMINFO_H
 
+#include "llvm/ADT/StringRef.h"
 #include "llvm/MC/MCAsmInfo.h"
 
 namespace llvm {
   class Target;
-  class StringRef;
   
   struct SPULinuxMCAsmInfo : public MCAsmInfo {
-    explicit SPULinuxMCAsmInfo(const Target &T, const StringRef &TT);
+    explicit SPULinuxMCAsmInfo(const Target &T, StringRef TT);
   };
 } // namespace llvm
 
diff --git a/lib/Target/MBlaze/MBlazeMCAsmInfo.cpp b/lib/Target/MBlaze/MBlazeMCAsmInfo.cpp
index 7ae465d..4abeb2e 100644
--- a/lib/Target/MBlaze/MBlazeMCAsmInfo.cpp
+++ b/lib/Target/MBlaze/MBlazeMCAsmInfo.cpp
@@ -14,7 +14,7 @@
 #include "MBlazeMCAsmInfo.h"
 using namespace llvm;
 
-MBlazeMCAsmInfo::MBlazeMCAsmInfo(const Target &T, const StringRef &TT) {
+MBlazeMCAsmInfo::MBlazeMCAsmInfo(const Target &T, StringRef TT) {
   AlignmentIsInBytes          = false;
   Data16bitsDirective         = "\t.half\t";
   Data32bitsDirective         = "\t.word\t";
diff --git a/lib/Target/MBlaze/MBlazeMCAsmInfo.h b/lib/Target/MBlaze/MBlazeMCAsmInfo.h
index bccb418..9d6ff3a 100644
--- a/lib/Target/MBlaze/MBlazeMCAsmInfo.h
+++ b/lib/Target/MBlaze/MBlazeMCAsmInfo.h
@@ -14,15 +14,15 @@
 #ifndef MBLAZETARGETASMINFO_H
 #define MBLAZETARGETASMINFO_H
 
+#include "llvm/ADT/StringRef.h"
 #include "llvm/MC/MCAsmInfo.h"
 
 namespace llvm {
   class Target;
-  class StringRef;
   
   class MBlazeMCAsmInfo : public MCAsmInfo {
   public:
-    explicit MBlazeMCAsmInfo(const Target &T, const StringRef &TT);
+    explicit MBlazeMCAsmInfo(const Target &T, StringRef TT);
   };
 
 } // namespace llvm
diff --git a/lib/Target/MSIL/MSILWriter.cpp b/lib/Target/MSIL/MSILWriter.cpp
index 8f97d25..cc350e8 100644
--- a/lib/Target/MSIL/MSILWriter.cpp
+++ b/lib/Target/MSIL/MSILWriter.cpp
@@ -1621,8 +1621,7 @@ const char* MSILWriter::getLibraryName(const GlobalVariable* GV) {
 }
 
 
-const char* MSILWriter::getLibraryForSymbol(const StringRef &Name, 
-                                            bool isFunction,
+const char* MSILWriter::getLibraryForSymbol(StringRef Name, bool isFunction,
                                             CallingConv::ID CallingConv) {
   // TODO: Read *.def file with function and libraries definitions.
   return "MSVCRT.DLL";  
diff --git a/lib/Target/MSIL/MSILWriter.h b/lib/Target/MSIL/MSILWriter.h
index a95ae23..92a3abe 100644
--- a/lib/Target/MSIL/MSILWriter.h
+++ b/lib/Target/MSIL/MSILWriter.h
@@ -246,7 +246,7 @@ namespace llvm {
 
     const char* getLibraryName(const GlobalVariable* GV); 
     
-    const char* getLibraryForSymbol(const StringRef &Name, bool isFunction,
+    const char* getLibraryForSymbol(StringRef Name, bool isFunction,
                                     CallingConv::ID CallingConv);
 
     void printExternals();
diff --git a/lib/Target/MSP430/MSP430MCAsmInfo.cpp b/lib/Target/MSP430/MSP430MCAsmInfo.cpp
index cfb499d..3f44944 100644
--- a/lib/Target/MSP430/MSP430MCAsmInfo.cpp
+++ b/lib/Target/MSP430/MSP430MCAsmInfo.cpp
@@ -14,7 +14,7 @@
 #include "MSP430MCAsmInfo.h"
 using namespace llvm;
 
-MSP430MCAsmInfo::MSP430MCAsmInfo(const Target &T, const StringRef &TT) {
+MSP430MCAsmInfo::MSP430MCAsmInfo(const Target &T, StringRef TT) {
   PrivateGlobalPrefix = ".L";
   WeakRefDirective ="\t.weak\t";
   PCSymbol=".";
diff --git a/lib/Target/MSP430/MSP430MCAsmInfo.h b/lib/Target/MSP430/MSP430MCAsmInfo.h
index 8318029..f3138a2 100644
--- a/lib/Target/MSP430/MSP430MCAsmInfo.h
+++ b/lib/Target/MSP430/MSP430MCAsmInfo.h
@@ -14,13 +14,14 @@
 #ifndef MSP430TARGETASMINFO_H
 #define MSP430TARGETASMINFO_H
 
+#include "llvm/ADT/StringRef.h"
 #include "llvm/MC/MCAsmInfo.h"
 
 namespace llvm {
   class Target;
-  class StringRef;
+
   struct MSP430MCAsmInfo : public MCAsmInfo {
-    explicit MSP430MCAsmInfo(const Target &T, const StringRef &TT);
+    explicit MSP430MCAsmInfo(const Target &T, StringRef TT);
   };
 
 } // namespace llvm
diff --git a/lib/Target/Mips/MipsMCAsmInfo.cpp b/lib/Target/Mips/MipsMCAsmInfo.cpp
index 89e3e11..fe48ab7 100644
--- a/lib/Target/Mips/MipsMCAsmInfo.cpp
+++ b/lib/Target/Mips/MipsMCAsmInfo.cpp
@@ -14,7 +14,7 @@
 #include "MipsMCAsmInfo.h"
 using namespace llvm;
 
-MipsMCAsmInfo::MipsMCAsmInfo(const Target &T, const StringRef &TT) {
+MipsMCAsmInfo::MipsMCAsmInfo(const Target &T, StringRef TT) {
   AlignmentIsInBytes          = false;
   Data16bitsDirective         = "\t.half\t";
   Data32bitsDirective         = "\t.word\t";
diff --git a/lib/Target/Mips/MipsMCAsmInfo.h b/lib/Target/Mips/MipsMCAsmInfo.h
index 33a4b5e..15a867e 100644
--- a/lib/Target/Mips/MipsMCAsmInfo.h
+++ b/lib/Target/Mips/MipsMCAsmInfo.h
@@ -14,15 +14,15 @@
 #ifndef MIPSTARGETASMINFO_H
 #define MIPSTARGETASMINFO_H
 
+#include "llvm/ADT/StringRef.h"
 #include "llvm/MC/MCAsmInfo.h"
 
 namespace llvm {
   class Target;
-  class StringRef;
   
   class MipsMCAsmInfo : public MCAsmInfo {
   public:
-    explicit MipsMCAsmInfo(const Target &T, const StringRef &TT);
+    explicit MipsMCAsmInfo(const Target &T, StringRef TT);
   };
 
 } // namespace llvm
diff --git a/lib/Target/PIC16/PIC16DebugInfo.cpp b/lib/Target/PIC16/PIC16DebugInfo.cpp
index 6a4d0d6..7a948de 100644
--- a/lib/Target/PIC16/PIC16DebugInfo.cpp
+++ b/lib/Target/PIC16/PIC16DebugInfo.cpp
@@ -416,7 +416,7 @@ void PIC16DbgInfo::EmitAuxEntry(const std::string VarName, int Aux[], int Num,
   if (!TagName.empty()) Tmp += ", " + TagName;
   
   for (int i = 0; i<Num; i++)
-    Tmp += "," + utostr(Aux[i] && 0xff);
+    Tmp += "," + utostr(Aux[i] & 0xff);
   
   OS.EmitRawText("\n\t.dim " + Twine(VarName) + ", 1" + Tmp);
 }
diff --git a/lib/Target/PIC16/PIC16MCAsmInfo.cpp b/lib/Target/PIC16/PIC16MCAsmInfo.cpp
index b080542..1bcc497 100644
--- a/lib/Target/PIC16/PIC16MCAsmInfo.cpp
+++ b/lib/Target/PIC16/PIC16MCAsmInfo.cpp
@@ -20,7 +20,7 @@
 #include "PIC16ISelLowering.h"
 using namespace llvm;
 
-PIC16MCAsmInfo::PIC16MCAsmInfo(const Target &T, const StringRef &TT) {
+PIC16MCAsmInfo::PIC16MCAsmInfo(const Target &T, StringRef TT) {
   CommentString = ";";
   GlobalPrefix = PAN::getTagName(PAN::PREFIX_SYMBOL);
   GlobalDirective = "\tglobal\t";
diff --git a/lib/Target/PIC16/PIC16MCAsmInfo.h b/lib/Target/PIC16/PIC16MCAsmInfo.h
index e84db85..6e1c111 100644
--- a/lib/Target/PIC16/PIC16MCAsmInfo.h
+++ b/lib/Target/PIC16/PIC16MCAsmInfo.h
@@ -25,7 +25,7 @@ namespace llvm {
     const char *RomData16bitsDirective;
     const char *RomData32bitsDirective;
   public:    
-    PIC16MCAsmInfo(const Target &T, const StringRef &TT);
+    PIC16MCAsmInfo(const Target &T, StringRef TT);
     
     virtual const char *getDataASDirective(unsigned size, unsigned AS) const;
   };
diff --git a/lib/Target/Sparc/SparcMCAsmInfo.cpp b/lib/Target/Sparc/SparcMCAsmInfo.cpp
index 535c6f7..d37d6d2 100644
--- a/lib/Target/Sparc/SparcMCAsmInfo.cpp
+++ b/lib/Target/Sparc/SparcMCAsmInfo.cpp
@@ -12,10 +12,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "SparcMCAsmInfo.h"
-#include "llvm/ADT/SmallVector.h"
 using namespace llvm;
 
-SparcELFMCAsmInfo::SparcELFMCAsmInfo(const Target &T, const StringRef &TT) {
+SparcELFMCAsmInfo::SparcELFMCAsmInfo(const Target &T, StringRef TT) {
   Data16bitsDirective = "\t.half\t";
   Data32bitsDirective = "\t.word\t";
   Data64bitsDirective = 0;  // .xword is only supported by V9.
diff --git a/lib/Target/Sparc/SparcMCAsmInfo.h b/lib/Target/Sparc/SparcMCAsmInfo.h
index 12d6ef4..0cb6827 100644
--- a/lib/Target/Sparc/SparcMCAsmInfo.h
+++ b/lib/Target/Sparc/SparcMCAsmInfo.h
@@ -14,13 +14,14 @@
 #ifndef SPARCTARGETASMINFO_H
 #define SPARCTARGETASMINFO_H
 
+#include "llvm/ADT/StringRef.h"
 #include "llvm/MC/MCAsmInfo.h"
 
 namespace llvm {
   class Target;
-  class StringRef;
+
   struct SparcELFMCAsmInfo : public MCAsmInfo {
-    explicit SparcELFMCAsmInfo(const Target &T, const StringRef &TT);
+    explicit SparcELFMCAsmInfo(const Target &T, StringRef TT);
   };
 
 } // namespace llvm
diff --git a/lib/Target/SystemZ/SystemZMCAsmInfo.cpp b/lib/Target/SystemZ/SystemZMCAsmInfo.cpp
index f9ccc47..4f7f70b 100644
--- a/lib/Target/SystemZ/SystemZMCAsmInfo.cpp
+++ b/lib/Target/SystemZ/SystemZMCAsmInfo.cpp
@@ -16,7 +16,7 @@
 #include "llvm/MC/MCSectionELF.h"
 using namespace llvm;
 
-SystemZMCAsmInfo::SystemZMCAsmInfo(const Target &T, const StringRef &TT) {
+SystemZMCAsmInfo::SystemZMCAsmInfo(const Target &T, StringRef TT) {
   PrivateGlobalPrefix = ".L";
   WeakRefDirective = "\t.weak\t";
   PCSymbol = ".";
diff --git a/lib/Target/SystemZ/SystemZMCAsmInfo.h b/lib/Target/SystemZ/SystemZMCAsmInfo.h
index 87908f2..a6a27e2 100644
--- a/lib/Target/SystemZ/SystemZMCAsmInfo.h
+++ b/lib/Target/SystemZ/SystemZMCAsmInfo.h
@@ -21,7 +21,7 @@ namespace llvm {
   class StringRef;
 
   struct SystemZMCAsmInfo : public MCAsmInfo {
-    explicit SystemZMCAsmInfo(const Target &T, const StringRef &TT);
+    explicit SystemZMCAsmInfo(const Target &T, StringRef TT);
     virtual const MCSection *getNonexecutableStackSection(MCContext &Ctx) const;
   };
   
diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp
index df52368..47c91df 100644
--- a/lib/Target/TargetMachine.cpp
+++ b/lib/Target/TargetMachine.cpp
@@ -294,7 +294,7 @@ namespace llvm {
   /// option is specified on the command line. If this returns false (default),
   /// the code generator is not allowed to assume that FP arithmetic arguments
   /// and results are never NaNs or +-Infs.
-  bool FiniteOnlyFPMath() { return UnsafeFPMath || FiniteOnlyFPMathOption; }
+  bool FiniteOnlyFPMath() { return FiniteOnlyFPMathOption; }
   
   /// HonorSignDependentRoundingFPMath - Return true if the codegen must assume
   /// that the rounding mode of the FPU can change from its default.
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index a856e9c..f1e66ab 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -65,7 +65,7 @@ public:
   X86ATTAsmParser(const Target &T, MCAsmParser &_Parser)
     : TargetAsmParser(T), Parser(_Parser) {}
 
-  virtual bool ParseInstruction(const StringRef &Name, SMLoc NameLoc,
+  virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc,
                                 SmallVectorImpl<MCParsedAsmOperand*> &Operands);
 
   virtual bool ParseDirective(AsmToken DirectiveID);
@@ -602,7 +602,7 @@ X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
 }
 
 bool X86ATTAsmParser::
-ParseInstruction(const StringRef &Name, SMLoc NameLoc,
+ParseInstruction(StringRef Name, SMLoc NameLoc,
                  SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
   // The various flavors of pushf and popf use Requires<In32BitMode> and
   // Requires<In64BitMode>, but the assembler doesn't yet implement that.
@@ -612,6 +612,8 @@ ParseInstruction(const StringRef &Name, SMLoc NameLoc,
       return Error(NameLoc, "popfl cannot be encoded in 64-bit mode");
     else if (Name == "pushfl")
       return Error(NameLoc, "pushfl cannot be encoded in 64-bit mode");
+    else if (Name == "pusha")
+      return Error(NameLoc, "pusha cannot be encoded in 64-bit mode");
   } else {
     if (Name == "popfq")
       return Error(NameLoc, "popfq cannot be encoded in 32-bit mode");
diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
index 73bc603..08e6486 100644
--- a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
+++ b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
@@ -17,7 +17,6 @@
 #include "X86IntelInstPrinter.h"
 #include "X86MCInstLower.h"
 #include "X86.h"
-#include "X86COFF.h"
 #include "X86COFFMachineModuleInfo.h"
 #include "X86MachineFunctionInfo.h"
 #include "X86TargetMachine.h"
@@ -35,6 +34,7 @@
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/CodeGen/MachineModuleInfoImpls.h"
 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/Support/COFF.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Target/Mangler.h"
 #include "llvm/Target/TargetOptions.h"
@@ -60,8 +60,10 @@ bool X86AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
   if (Subtarget->isTargetCOFF()) {
     bool Intrn = MF.getFunction()->hasInternalLinkage();
     OutStreamer.BeginCOFFSymbolDef(CurrentFnSym);
-    OutStreamer.EmitCOFFSymbolStorageClass(Intrn ? COFF::C_STAT : COFF::C_EXT);
-    OutStreamer.EmitCOFFSymbolType(COFF::DT_FCN << COFF::N_BTSHFT);
+    OutStreamer.EmitCOFFSymbolStorageClass(Intrn ? COFF::IMAGE_SYM_CLASS_STATIC 
+                                              : COFF::IMAGE_SYM_CLASS_EXTERNAL);
+    OutStreamer.EmitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_FUNCTION
+                                               << COFF::SCT_COMPLEX_TYPE_SHIFT);
     OutStreamer.EndCOFFSymbolDef();
   }
 
@@ -582,8 +584,9 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
                             E = COFFMMI.externals_end();
                             I != E; ++I) {
       OutStreamer.BeginCOFFSymbolDef(CurrentFnSym);
-      OutStreamer.EmitCOFFSymbolStorageClass(COFF::C_EXT);
-      OutStreamer.EmitCOFFSymbolType(COFF::DT_FCN << COFF::N_BTSHFT);
+      OutStreamer.EmitCOFFSymbolStorageClass(COFF::IMAGE_SYM_CLASS_EXTERNAL);
+      OutStreamer.EmitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_FUNCTION
+                                               << COFF::SCT_COMPLEX_TYPE_SHIFT);
       OutStreamer.EndCOFFSymbolDef();
     }
 
diff --git a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
index 09f150b..e67fc06 100644
--- a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
+++ b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
@@ -154,15 +154,13 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
       
   case X86II::MO_TLVP:      RefKind = MCSymbolRefExpr::VK_TLVP; break;
   case X86II::MO_TLVP_PIC_BASE:
-      Expr = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_TLVP, Ctx);
-      // Subtract the pic base.
-      Expr 
-        = MCBinaryExpr::CreateSub(Expr,
-                                  MCSymbolRefExpr::Create(GetPICBaseSymbol(),
-                                                          Ctx),
-                                  Ctx);
-  
-      break;
+    Expr = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_TLVP, Ctx);
+    // Subtract the pic base.
+    Expr = MCBinaryExpr::CreateSub(Expr,
+                                   MCSymbolRefExpr::Create(GetPICBaseSymbol(),
+                                                           Ctx),
+                                   Ctx);
+    break;
   case X86II::MO_TLSGD:     RefKind = MCSymbolRefExpr::VK_TLSGD; break;
   case X86II::MO_GOTTPOFF:  RefKind = MCSymbolRefExpr::VK_GOTTPOFF; break;
   case X86II::MO_INDNTPOFF: RefKind = MCSymbolRefExpr::VK_INDNTPOFF; break;
diff --git a/lib/Target/X86/X86COFF.h b/lib/Target/X86/X86COFF.h
deleted file mode 100644
index 0a8e4e6..0000000
--- a/lib/Target/X86/X86COFF.h
+++ /dev/null
@@ -1,95 +0,0 @@
-//===--- X86COFF.h - Some definitions from COFF documentations ------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file just defines some symbols found in COFF documentation. They are
-// used to emit function type information for COFF targets (Cygwin/Mingw32).
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef X86COFF_H
-#define X86COFF_H
-
-namespace COFF 
-{
-/// Storage class tells where and what the symbol represents
-enum StorageClass {
-  C_EFCN =   -1,  ///< Physical end of function
-  C_NULL    = 0,  ///< No symbol
-  C_AUTO    = 1,  ///< External definition
-  C_EXT     = 2,  ///< External symbol
-  C_STAT    = 3,  ///< Static
-  C_REG     = 4,  ///< Register variable
-  C_EXTDEF  = 5,  ///< External definition
-  C_LABEL   = 6,  ///< Label
-  C_ULABEL  = 7,  ///< Undefined label
-  C_MOS     = 8,  ///< Member of structure
-  C_ARG     = 9,  ///< Function argument
-  C_STRTAG  = 10, ///< Structure tag
-  C_MOU     = 11, ///< Member of union
-  C_UNTAG   = 12, ///< Union tag
-  C_TPDEF   = 13, ///< Type definition
-  C_USTATIC = 14, ///< Undefined static
-  C_ENTAG   = 15, ///< Enumeration tag
-  C_MOE     = 16, ///< Member of enumeration
-  C_REGPARM = 17, ///< Register parameter
-  C_FIELD   = 18, ///< Bit field
-
-  C_BLOCK  = 100, ///< ".bb" or ".eb" - beginning or end of block
-  C_FCN    = 101, ///< ".bf" or ".ef" - beginning or end of function
-  C_EOS    = 102, ///< End of structure
-  C_FILE   = 103, ///< File name
-  C_LINE   = 104, ///< Line number, reformatted as symbol
-  C_ALIAS  = 105, ///< Duplicate tag
-  C_HIDDEN = 106  ///< External symbol in dmert public lib
-};
-
-/// The type of the symbol. This is made up of a base type and a derived type.
-/// For example, pointer to int is "pointer to T" and "int"
-enum SymbolType {
-  T_NULL   = 0,  ///< No type info
-  T_ARG    = 1,  ///< Void function argument (only used by compiler)
-  T_VOID   = 1,  ///< The same as above. Just named differently in some specs.
-  T_CHAR   = 2,  ///< Character
-  T_SHORT  = 3,  ///< Short integer
-  T_INT    = 4,  ///< Integer
-  T_LONG   = 5,  ///< Long integer
-  T_FLOAT  = 6,  ///< Floating point
-  T_DOUBLE = 7,  ///< Double word
-  T_STRUCT = 8,  ///< Structure
-  T_UNION  = 9,  ///< Union
-  T_ENUM   = 10, ///< Enumeration
-  T_MOE    = 11, ///< Member of enumeration
-  T_UCHAR  = 12, ///< Unsigned character
-  T_USHORT = 13, ///< Unsigned short
-  T_UINT   = 14, ///< Unsigned integer
-  T_ULONG  = 15  ///< Unsigned long
-};
-
-/// Derived type of symbol
-enum SymbolDerivedType {
-  DT_NON = 0, ///< No derived type
-  DT_PTR = 1, ///< Pointer to T
-  DT_FCN = 2, ///< Function returning T
-  DT_ARY = 3  ///< Array of T
-};
-
-/// Masks for extracting parts of type
-enum SymbolTypeMasks {
-  N_BTMASK = 017, ///< Mask for base type
-  N_TMASK  = 060  ///< Mask for derived type
-};
-
-/// Offsets of parts of type
-enum Shifts {
-  N_BTSHFT = 4 ///< Type is formed as (base + derived << N_BTSHIFT)
-};
-
-}
-
-#endif // X86COFF_H
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index cdde24a..ce13707 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -540,7 +540,7 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
       StubAM.GVOpFlags = GVFlags;
 
       // Prepare for inserting code in the local-value area.
-      MachineBasicBlock::iterator SaveInsertPt = enterLocalValueArea();
+      SavePoint SaveInsertPt = enterLocalValueArea();
 
       if (TLI.getPointerTy() == MVT::i64) {
         Opc = X86::MOV64rm;
@@ -1279,12 +1279,11 @@ bool X86FastISel::X86SelectTrunc(const Instruction *I) {
     return false;
 
   // First issue a copy to GR16_ABCD or GR32_ABCD.
-  unsigned CopyOpc = (SrcVT == MVT::i16) ? X86::MOV16rr : X86::MOV32rr;
   const TargetRegisterClass *CopyRC = (SrcVT == MVT::i16)
     ? X86::GR16_ABCDRegisterClass : X86::GR32_ABCDRegisterClass;
   unsigned CopyReg = createResultReg(CopyRC);
-  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CopyOpc), CopyReg)
-    .addReg(InputReg);
+  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+          CopyReg).addReg(InputReg);
 
   // Then issue an extract_subreg.
   unsigned ResultReg = FastEmitInst_extractsubreg(MVT::i8,
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 1a63474..b3c4886 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -2458,17 +2458,23 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
     // If the tailcall address may be in a register, then make sure it's
     // possible to register allocate for it. In 32-bit, the call address can
     // only target EAX, EDX, or ECX since the tail call must be scheduled after
-    // callee-saved registers are restored. In 64-bit, it's RAX, RCX, RDX, RSI,
-    // RDI, R8, R9, R11.
-    if (!isa<GlobalAddressSDNode>(Callee) &&
+    // callee-saved registers are restored. These happen to be the same
+    // registers used to pass 'inreg' arguments so watch out for those.
+    if (!Subtarget->is64Bit() &&
+        !isa<GlobalAddressSDNode>(Callee) &&
         !isa<ExternalSymbolSDNode>(Callee)) {
-      unsigned Limit = Subtarget->is64Bit() ? 8 : 3;
       unsigned NumInRegs = 0;
       for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
         CCValAssign &VA = ArgLocs[i];
-        if (VA.isRegLoc()) {
-          if (++NumInRegs == Limit)
+        if (!VA.isRegLoc())
+          continue;
+        unsigned Reg = VA.getLocReg();
+        switch (Reg) {
+        default: break;
+        case X86::EAX: case X86::EDX: case X86::ECX:
+          if (++NumInRegs == 3)
             return false;
+          break;
         }
       }
     }
@@ -7993,7 +7999,6 @@ X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr,
                                                        unsigned immOpc,
                                                        unsigned LoadOpc,
                                                        unsigned CXchgOpc,
-                                                       unsigned copyOpc,
                                                        unsigned notOpc,
                                                        unsigned EAXreg,
                                                        TargetRegisterClass *RC,
@@ -8070,7 +8075,7 @@ X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr,
   MIB.addReg(tt);
   (*MIB).addOperand(*argOpers[valArgIndx]);
 
-  MIB = BuildMI(newMBB, dl, TII->get(copyOpc), EAXreg);
+  MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), EAXreg);
   MIB.addReg(t1);
 
   MIB = BuildMI(newMBB, dl, TII->get(CXchgOpc));
@@ -8081,7 +8086,7 @@ X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr,
   (*MIB).setMemRefs(bInstr->memoperands_begin(),
                     bInstr->memoperands_end());
 
-  MIB = BuildMI(newMBB, dl, TII->get(copyOpc), destOper.getReg());
+  MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), destOper.getReg());
   MIB.addReg(EAXreg);
 
   // insert branch
@@ -8117,7 +8122,6 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr,
 
   const TargetRegisterClass *RC = X86::GR32RegisterClass;
   const unsigned LoadOpc = X86::MOV32rm;
-  const unsigned copyOpc = X86::MOV32rr;
   const unsigned NotOpc = X86::NOT32r;
   const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
   const BasicBlock *LLVM_BB = MBB->getBasicBlock();
@@ -8227,14 +8231,14 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr,
     MIB.addReg(t2);
   (*MIB).addOperand(*argOpers[valArgIndx + 1]);
 
-  MIB = BuildMI(newMBB, dl, TII->get(copyOpc), X86::EAX);
+  MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EAX);
   MIB.addReg(t1);
-  MIB = BuildMI(newMBB, dl, TII->get(copyOpc), X86::EDX);
+  MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EDX);
   MIB.addReg(t2);
 
-  MIB = BuildMI(newMBB, dl, TII->get(copyOpc), X86::EBX);
+  MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EBX);
   MIB.addReg(t5);
-  MIB = BuildMI(newMBB, dl, TII->get(copyOpc), X86::ECX);
+  MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::ECX);
   MIB.addReg(t6);
 
   MIB = BuildMI(newMBB, dl, TII->get(X86::LCMPXCHG8B));
@@ -8245,9 +8249,9 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr,
   (*MIB).setMemRefs(bInstr->memoperands_begin(),
                     bInstr->memoperands_end());
 
-  MIB = BuildMI(newMBB, dl, TII->get(copyOpc), t3);
+  MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), t3);
   MIB.addReg(X86::EAX);
-  MIB = BuildMI(newMBB, dl, TII->get(copyOpc), t4);
+  MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), t4);
   MIB.addReg(X86::EDX);
 
   // insert branch
@@ -8326,12 +8330,12 @@ X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr,
 
   unsigned t2 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass);
   if (argOpers[valArgIndx]->isReg())
-    MIB = BuildMI(newMBB, dl, TII->get(X86::MOV32rr), t2);
+    MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), t2);
   else
     MIB = BuildMI(newMBB, dl, TII->get(X86::MOV32rr), t2);
   (*MIB).addOperand(*argOpers[valArgIndx]);
 
-  MIB = BuildMI(newMBB, dl, TII->get(X86::MOV32rr), X86::EAX);
+  MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EAX);
   MIB.addReg(t1);
 
   MIB = BuildMI(newMBB, dl, TII->get(X86::CMP32rr));
@@ -8353,7 +8357,7 @@ X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr,
   (*MIB).setMemRefs(mInstr->memoperands_begin(),
                     mInstr->memoperands_end());
 
-  MIB = BuildMI(newMBB, dl, TII->get(X86::MOV32rr), destOper.getReg());
+  MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), destOper.getReg());
   MIB.addReg(X86::EAX);
 
   // insert branch
@@ -8735,25 +8739,25 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
   case X86::ATOMAND32:
     return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND32rr,
                                                X86::AND32ri, X86::MOV32rm,
-                                               X86::LCMPXCHG32, X86::MOV32rr,
+                                               X86::LCMPXCHG32,
                                                X86::NOT32r, X86::EAX,
                                                X86::GR32RegisterClass);
   case X86::ATOMOR32:
     return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR32rr,
                                                X86::OR32ri, X86::MOV32rm,
-                                               X86::LCMPXCHG32, X86::MOV32rr,
+                                               X86::LCMPXCHG32,
                                                X86::NOT32r, X86::EAX,
                                                X86::GR32RegisterClass);
   case X86::ATOMXOR32:
     return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR32rr,
                                                X86::XOR32ri, X86::MOV32rm,
-                                               X86::LCMPXCHG32, X86::MOV32rr,
+                                               X86::LCMPXCHG32,
                                                X86::NOT32r, X86::EAX,
                                                X86::GR32RegisterClass);
   case X86::ATOMNAND32:
     return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND32rr,
                                                X86::AND32ri, X86::MOV32rm,
-                                               X86::LCMPXCHG32, X86::MOV32rr,
+                                               X86::LCMPXCHG32,
                                                X86::NOT32r, X86::EAX,
                                                X86::GR32RegisterClass, true);
   case X86::ATOMMIN32:
@@ -8768,25 +8772,25 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
   case X86::ATOMAND16:
     return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND16rr,
                                                X86::AND16ri, X86::MOV16rm,
-                                               X86::LCMPXCHG16, X86::MOV16rr,
+                                               X86::LCMPXCHG16,
                                                X86::NOT16r, X86::AX,
                                                X86::GR16RegisterClass);
   case X86::ATOMOR16:
     return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR16rr,
                                                X86::OR16ri, X86::MOV16rm,
-                                               X86::LCMPXCHG16, X86::MOV16rr,
+                                               X86::LCMPXCHG16,
                                                X86::NOT16r, X86::AX,
                                                X86::GR16RegisterClass);
   case X86::ATOMXOR16:
     return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR16rr,
                                                X86::XOR16ri, X86::MOV16rm,
-                                               X86::LCMPXCHG16, X86::MOV16rr,
+                                               X86::LCMPXCHG16,
                                                X86::NOT16r, X86::AX,
                                                X86::GR16RegisterClass);
   case X86::ATOMNAND16:
     return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND16rr,
                                                X86::AND16ri, X86::MOV16rm,
-                                               X86::LCMPXCHG16, X86::MOV16rr,
+                                               X86::LCMPXCHG16,
                                                X86::NOT16r, X86::AX,
                                                X86::GR16RegisterClass, true);
   case X86::ATOMMIN16:
@@ -8801,25 +8805,25 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
   case X86::ATOMAND8:
     return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND8rr,
                                                X86::AND8ri, X86::MOV8rm,
-                                               X86::LCMPXCHG8, X86::MOV8rr,
+                                               X86::LCMPXCHG8,
                                                X86::NOT8r, X86::AL,
                                                X86::GR8RegisterClass);
   case X86::ATOMOR8:
     return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR8rr,
                                                X86::OR8ri, X86::MOV8rm,
-                                               X86::LCMPXCHG8, X86::MOV8rr,
+                                               X86::LCMPXCHG8,
                                                X86::NOT8r, X86::AL,
                                                X86::GR8RegisterClass);
   case X86::ATOMXOR8:
     return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR8rr,
                                                X86::XOR8ri, X86::MOV8rm,
-                                               X86::LCMPXCHG8, X86::MOV8rr,
+                                               X86::LCMPXCHG8,
                                                X86::NOT8r, X86::AL,
                                                X86::GR8RegisterClass);
   case X86::ATOMNAND8:
     return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND8rr,
                                                X86::AND8ri, X86::MOV8rm,
-                                               X86::LCMPXCHG8, X86::MOV8rr,
+                                               X86::LCMPXCHG8,
                                                X86::NOT8r, X86::AL,
                                                X86::GR8RegisterClass, true);
   // FIXME: There are no CMOV8 instructions; MIN/MAX need some other way.
@@ -8827,25 +8831,25 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
   case X86::ATOMAND64:
     return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND64rr,
                                                X86::AND64ri32, X86::MOV64rm,
-                                               X86::LCMPXCHG64, X86::MOV64rr,
+                                               X86::LCMPXCHG64,
                                                X86::NOT64r, X86::RAX,
                                                X86::GR64RegisterClass);
   case X86::ATOMOR64:
     return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR64rr,
                                                X86::OR64ri32, X86::MOV64rm,
-                                               X86::LCMPXCHG64, X86::MOV64rr,
+                                               X86::LCMPXCHG64,
                                                X86::NOT64r, X86::RAX,
                                                X86::GR64RegisterClass);
   case X86::ATOMXOR64:
     return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR64rr,
                                                X86::XOR64ri32, X86::MOV64rm,
-                                               X86::LCMPXCHG64, X86::MOV64rr,
+                                               X86::LCMPXCHG64,
                                                X86::NOT64r, X86::RAX,
                                                X86::GR64RegisterClass);
   case X86::ATOMNAND64:
     return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND64rr,
                                                X86::AND64ri32, X86::MOV64rm,
-                                               X86::LCMPXCHG64, X86::MOV64rr,
+                                               X86::LCMPXCHG64,
                                                X86::NOT64r, X86::RAX,
                                                X86::GR64RegisterClass, true);
   case X86::ATOMMIN64:
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 2d28e5c..4e4daa4 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -764,7 +764,6 @@ namespace llvm {
                                                     unsigned immOpc,
                                                     unsigned loadOpc,
                                                     unsigned cxchgOpc,
-                                                    unsigned copyOpc,
                                                     unsigned notOpc,
                                                     unsigned EAXreg,
                                                     TargetRegisterClass *RC,
diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td
index 97578af..cc3fdf1 100644
--- a/lib/Target/X86/X86InstrFormats.td
+++ b/lib/Target/X86/X86InstrFormats.td
@@ -106,6 +106,7 @@ class VEX    { bit hasVEXPrefix = 1; }
 class VEX_W  { bit hasVEX_WPrefix = 1; }
 class VEX_4V : VEX { bit hasVEX_4VPrefix = 1; }
 class VEX_I8IMM { bit hasVEX_i8ImmReg = 1; }
+class VEX_L  { bit hasVEX_L = 1; }
 
 class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
               string AsmStr, Domain d = GenericDomain>
@@ -138,6 +139,7 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
   bit hasVEX_4VPrefix = 0;  // Does this inst requires the VEX.VVVV field?
   bit hasVEX_i8ImmReg = 0;  // Does this inst requires the last source register
                             // to be encoded in a immediate field?
+  bit hasVEX_L = 0;         // Does this inst uses large (256-bit) registers?
 
   // TSFlags layout should be kept in sync with X86InstrInfo.h.
   let TSFlags{5-0}   = FormBits;
@@ -155,6 +157,7 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
   let TSFlags{33}    = hasVEX_WPrefix;
   let TSFlags{34}    = hasVEX_4VPrefix;
   let TSFlags{35}    = hasVEX_i8ImmReg;
+  let TSFlags{36}    = hasVEX_L;
 }
 
 class I<bits<8> o, Format f, dag outs, dag ins, string asm,
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h
index f762b58..ad0217a 100644
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -453,7 +453,13 @@ namespace X86II {
     // VEX_I8IMM - Specifies that the last register used in a AVX instruction,
     // must be encoded in the i8 immediate field. This usually happens in
     // instructions with 4 operands.
-    VEX_I8IMM   = 1ULL << 35
+    VEX_I8IMM   = 1ULL << 35,
+
+    // VEX_L - Stands for a bit in the VEX opcode prefix meaning the current
+    // instruction uses 256-bit wide registers. This is usually auto detected if
+    // a VR256 register is used, but some AVX instructions also have this field
+    // marked when using a f256 memory references.
+    VEX_L       = 1ULL << 36
   };
   
   // getBaseOpcodeFor - This function returns the "base" X86 opcode for the
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index ab0005b..ebe161b 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -666,6 +666,9 @@ defm VCVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load,
 defm VCVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, f128mem, load,
                             "cvtdq2ps\t{$src, $dst|$dst, $src}",
                             SSEPackedSingle>, TB, VEX;
+defm VCVTDQ2PSY : sse12_cvt_p<0x5B, VR256, VR256, undef, f256mem, load,
+                            "cvtdq2ps\t{$src, $dst|$dst, $src}",
+                            SSEPackedSingle>, TB, VEX;
 }
 let Pattern = []<dag> in {
 defm CVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load /*dummy*/,
@@ -806,9 +809,13 @@ def Int_CVTDQ2PDrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
 // Convert packed single/double fp to doubleword
 let isAsmParserOnly = 1 in {
 def VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
-                     "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+                       "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
 def VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
-                     "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+                       "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTPS2DQYrr : VPDI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+                        "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTPS2DQYrm : VPDI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
+                        "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
 }
 def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                      "cvtps2dq\t{$src, $dst|$dst, $src}", []>;
@@ -862,6 +869,10 @@ def VCVTTPS2DQrr : VSSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                       "cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
 def VCVTTPS2DQrm : VSSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
                       "cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTTPS2DQYrr : VSSI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+                      "cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTTPS2DQYrm : VSSI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
+                      "cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
 }
 def CVTTPS2DQrr : SSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                       "cvttps2dq\t{$src, $dst|$dst, $src}", []>;
@@ -912,14 +923,39 @@ def Int_CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src),
                           [(set VR128:$dst, (int_x86_sse2_cvttpd2dq
                                              (memop addr:$src)))]>;
 
+let isAsmParserOnly = 1 in {
+// The assembler can recognize rr 256-bit instructions by seeing a ymm
+// register, but the same isn't true when using memory operands instead.
+// Provide other assembly rr and rm forms to address this explicitly.
+def VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                        "cvttpd2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTTPD2DQXrYr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
+                          "cvttpd2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+
+// XMM only
+def VCVTTPD2DQXrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                         "cvttpd2dqx\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTTPD2DQXrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+                         "cvttpd2dqx\t{$src, $dst|$dst, $src}", []>, VEX;
+
+// YMM only
+def VCVTTPD2DQYrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
+                         "cvttpd2dqy\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
+                         "cvttpd2dqy\t{$src, $dst|$dst, $src}", []>, VEX, VEX_L;
+}
+
 // Convert packed single to packed double
-let isAsmParserOnly = 1 in { // SSE2 instructions without OpSize prefix
+let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+                  // SSE2 instructions without OpSize prefix
 def VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
-                       "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, VEX,
-                       Requires<[HasAVX]>;
+                     "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, VEX;
 def VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
-                       "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, VEX,
-                       Requires<[HasAVX]>;
+                     "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTPS2PDYrr : I<0x5A, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
+                     "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTPS2PDYrm : I<0x5A, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src),
+                     "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, VEX;
 }
 def CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                        "cvtps2pd\t{$src, $dst|$dst, $src}", []>, TB;
@@ -949,10 +985,25 @@ def Int_CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
 
 // Convert packed double to packed single
 let isAsmParserOnly = 1 in {
+// The assembler can recognize rr 256-bit instructions by seeing a ymm
+// register, but the same isn't true when using memory operands instead.
+// Provide other assembly rr and rm forms to address this explicitly.
 def VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
-                     "cvtpd2ps\t{$src, $dst|$dst, $src}", []>, VEX;
-// FIXME: the memory form of this instruction should described using
-// use extra asm syntax
+                       "cvtpd2ps\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTPD2PSXrYr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
+                         "cvtpd2ps\t{$src, $dst|$dst, $src}", []>, VEX;
+
+// XMM only
+def VCVTPD2PSXrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                        "cvtpd2psx\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTPD2PSXrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+                        "cvtpd2psx\t{$src, $dst|$dst, $src}", []>, VEX;
+
+// YMM only
+def VCVTPD2PSYrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
+                        "cvtpd2psy\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTPD2PSYrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
+                        "cvtpd2psy\t{$src, $dst|$dst, $src}", []>, VEX, VEX_L;
 }
 def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                      "cvtpd2ps\t{$src, $dst|$dst, $src}", []>;
@@ -1142,6 +1193,16 @@ let isAsmParserOnly = 1 in {
                  "cmp${cc}pd\t{$src, $src1, $dst|$dst, $src1, $src}",
                  "cmppd\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}",
                  SSEPackedDouble>, OpSize, VEX_4V;
+  let Pattern = []<dag> in {
+    defm VCMPPSY : sse12_cmp_packed<VR256, f256mem, int_x86_sse_cmp_ps,
+                   "cmp${cc}ps\t{$src, $src1, $dst|$dst, $src1, $src}",
+                   "cmpps\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}",
+                   SSEPackedSingle>, VEX_4V;
+    defm VCMPPDY : sse12_cmp_packed<VR256, f256mem, int_x86_sse2_cmp_pd,
+                   "cmp${cc}pd\t{$src, $src1, $dst|$dst, $src1, $src}",
+                   "cmppd\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}",
+                   SSEPackedDouble>, OpSize, VEX_4V;
+  }
 }
 let Constraints = "$src1 = $dst" in {
   defm CMPPS : sse12_cmp_packed<VR128, f128mem, int_x86_sse_cmp_ps,
@@ -2935,19 +2996,46 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
 // SSE3 - Conversion Instructions
 //===---------------------------------------------------------------------===//
 
+// Convert Packed Double FP to Packed DW Integers
 let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+// The assembler can recognize rr 256-bit instructions by seeing a ymm
+// register, but the same isn't true when using memory operands instead.
+// Provide other assembly rr and rm forms to address this explicitly.
 def VCVTPD2DQrr  : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                        "vcvtpd2dq\t{$src, $dst|$dst, $src}", []>, VEX;
-def VCVTDQ2PDrm  : S3SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
-                       "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
-def VCVTDQ2PDrr  : S3SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
-                       "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTPD2DQXrYr  : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
+                       "vcvtpd2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+
+// XMM only
+def VCVTPD2DQXrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                      "vcvtpd2dqx\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTPD2DQXrm : S3DI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+                      "vcvtpd2dqx\t{$src, $dst|$dst, $src}", []>, VEX;
+
+// YMM only
+def VCVTPD2DQYrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
+                      "vcvtpd2dqy\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTPD2DQYrm : S3DI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
+                      "vcvtpd2dqy\t{$src, $dst|$dst, $src}", []>, VEX, VEX_L;
 }
 
 def CVTPD2DQrm  : S3DI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
                        "cvtpd2dq\t{$src, $dst|$dst, $src}", []>;
 def CVTPD2DQrr  : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                        "cvtpd2dq\t{$src, $dst|$dst, $src}", []>;
+
+// Convert Packed DW Integers to Packed Double FP
+let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+def VCVTDQ2PDrm  : S3SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+                       "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTDQ2PDrr  : S3SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                       "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTDQ2PDYrm  : S3SI<0xE6, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src),
+                       "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTDQ2PDYrr  : S3SI<0xE6, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
+                       "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+}
+
 def CVTDQ2PDrm  : S3SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
                        "cvtdq2pd\t{$src, $dst|$dst, $src}", []>;
 def CVTDQ2PDrr  : S3SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
diff --git a/lib/Target/X86/X86MCCodeEmitter.cpp b/lib/Target/X86/X86MCCodeEmitter.cpp
index 633ddd4..23b0666 100644
--- a/lib/Target/X86/X86MCCodeEmitter.cpp
+++ b/lib/Target/X86/X86MCCodeEmitter.cpp
@@ -432,6 +432,9 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
   if (TSFlags & X86II::VEX_W)
     VEX_W = 1;
 
+  if (TSFlags & X86II::VEX_L)
+    VEX_L = 1;
+
   switch (TSFlags & X86II::Op0Mask) {
   default: assert(0 && "Invalid prefix!");
   case X86II::T8:  // 0F 38
diff --git a/lib/Target/XCore/XCoreMCAsmInfo.cpp b/lib/Target/XCore/XCoreMCAsmInfo.cpp
index 5f6feae..42ab1b3 100644
--- a/lib/Target/XCore/XCoreMCAsmInfo.cpp
+++ b/lib/Target/XCore/XCoreMCAsmInfo.cpp
@@ -10,7 +10,7 @@
 #include "XCoreMCAsmInfo.h"
 using namespace llvm;
 
-XCoreMCAsmInfo::XCoreMCAsmInfo(const Target &T, const StringRef &TT) {
+XCoreMCAsmInfo::XCoreMCAsmInfo(const Target &T, StringRef TT) {
   SupportsDebugInformation = true;
   Data16bitsDirective = "\t.short\t";
   Data32bitsDirective = "\t.long\t";
diff --git a/lib/Target/XCore/XCoreMCAsmInfo.h b/lib/Target/XCore/XCoreMCAsmInfo.h
index 01f8e48..8403922 100644
--- a/lib/Target/XCore/XCoreMCAsmInfo.h
+++ b/lib/Target/XCore/XCoreMCAsmInfo.h
@@ -14,14 +14,15 @@
 #ifndef XCORETARGETASMINFO_H
 #define XCORETARGETASMINFO_H
 
+#include "llvm/ADT/StringRef.h"
 #include "llvm/MC/MCAsmInfo.h"
 
 namespace llvm {
   class Target;
-  class StringRef;
+
   class XCoreMCAsmInfo : public MCAsmInfo {
   public:
-    explicit XCoreMCAsmInfo(const Target &T, const StringRef &TT);
+    explicit XCoreMCAsmInfo(const Target &T, StringRef TT);
   };
 
 } // namespace llvm
diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp
index 027a220..9bb01f5 100644
--- a/lib/Transforms/IPO/Inliner.cpp
+++ b/lib/Transforms/IPO/Inliner.cpp
@@ -399,7 +399,7 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) {
         // We can only inline direct calls to non-declarations.
         if (Callee == 0 || Callee->isDeclaration()) continue;
       
-        // If this call sites was obtained by inlining another function, verify
+        // If this call site was obtained by inlining another function, verify
         // that the include path for the function did not include the callee
         // itself.  If so, we'd be recursively inlinling the same function,
         // which would provide the same callsites, which would cause us to
diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp
index 55d5e2a..aeeafe7 100644
--- a/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/lib/Transforms/IPO/MergeFunctions.cpp
@@ -603,6 +603,10 @@ static void ThunkGToF(Function *F, Function *G) {
 }
 
 static void AliasGToF(Function *F, Function *G) {
+  // Darwin will trigger llvm_unreachable if asked to codegen an alias.
+  return ThunkGToF(F, G);
+
+#if 0
   if (!G->hasExternalLinkage() && !G->hasLocalLinkage() && !G->hasWeakLinkage())
     return ThunkGToF(F, G);
 
@@ -614,6 +618,7 @@ static void AliasGToF(Function *F, Function *G) {
   GA->setVisibility(G->getVisibility());
   G->replaceAllUsesWith(GA);
   G->eraseFromParent();
+#endif
 }
 
 static bool fold(std::vector<Function *> &FnVec, unsigned i, unsigned j) {
diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 3f4a857..5876f40 100644
--- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -472,6 +472,25 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
       Value *NewOr = Builder->CreateOr(Val, Val2);
       return Builder->CreateICmp(LHSCC, NewOr, LHSCst);
     }
+    
+    // (icmp ne (A & C1), 0) & (icmp ne (A & C2), 0) -->
+    // (icmp eq (A & (C1|C2)), (C1|C2))
+    if (LHSCC == ICmpInst::ICMP_NE && LHSCst->isZero()) {
+      Instruction *I1 = dyn_cast<Instruction>(Val);
+      Instruction *I2 = dyn_cast<Instruction>(Val2);
+      if (I1 && I1->getOpcode() == Instruction::And &&
+          I2 && I2->getOpcode() == Instruction::And &&
+          I1->getOperand(0) == I1->getOperand(0)) {
+        ConstantInt *CI1 = dyn_cast<ConstantInt>(I1->getOperand(1));
+        ConstantInt *CI2 = dyn_cast<ConstantInt>(I2->getOperand(1));
+        if (CI1 && !CI1->isZero() && CI2 && !CI2->isZero() &&
+            CI1->getValue().operator&(CI2->getValue()) == 0) {
+          Constant *ConstOr = ConstantExpr::getOr(CI1, CI2);
+          Value *NewAnd = Builder->CreateAnd(I1->getOperand(0), ConstOr);
+          return Builder->CreateICmp(ICmpInst::ICMP_EQ, NewAnd, ConstOr);
+        }
+      }
+    }
   }
   
   // From here on, we only handle:
diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp
index c44fe9d..f9ffdb1 100644
--- a/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -699,6 +699,34 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
     SI.setOperand(2, TrueVal);
     return &SI;
   }
+  
+  // select (A == 0 | B == 0), T, F--> select (A != 0 & B != 0), F, T
+  // Note: This is a canonicalization rather than an optimization, and is used
+  // to expose opportunities to other instcombine transforms.
+  Instruction* CondInst = dyn_cast<Instruction>(CondVal);
+  if (CondInst && CondInst->hasOneUse() && 
+      CondInst->getOpcode() == Instruction::Or) {
+    ICmpInst *LHSCmp = dyn_cast<ICmpInst>(CondInst->getOperand(0));
+    ICmpInst *RHSCmp = dyn_cast<ICmpInst>(CondInst->getOperand(1));
+    if (LHSCmp && LHSCmp->hasOneUse() &&
+                  LHSCmp->getPredicate() == ICmpInst::ICMP_EQ &&
+        RHSCmp && RHSCmp->hasOneUse() &&
+                  RHSCmp->getPredicate() == ICmpInst::ICMP_EQ) {
+      ConstantInt* C1 = dyn_cast<ConstantInt>(LHSCmp->getOperand(1));
+      ConstantInt* C2 = dyn_cast<ConstantInt>(RHSCmp->getOperand(1));
+      if (C1 && C1->isZero() && C2 && C2->isZero()) {
+        LHSCmp->setPredicate(ICmpInst::ICMP_NE);
+        RHSCmp->setPredicate(ICmpInst::ICMP_NE);
+        Value *And =
+          InsertNewInstBefore(BinaryOperator::CreateAnd(LHSCmp, RHSCmp,
+                                             "and."+CondVal->getName()), SI);
+        SI.setOperand(0, And);
+        SI.setOperand(1, FalseVal);
+        SI.setOperand(2, TrueVal);
+        return &SI;
+      }
+    }
+  }
 
   return 0;
 }
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index a250a88..1f9b415 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -2362,7 +2362,7 @@ void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx,
                                           Formula Base) {
   // TODO: For now, just add the min and max offset, because it usually isn't
   // worthwhile looking at everything inbetween.
-  SmallVector<int64_t, 4> Worklist;
+  SmallVector<int64_t, 2> Worklist;
   Worklist.push_back(LU.MinOffset);
   if (LU.MaxOffset != LU.MinOffset)
     Worklist.push_back(LU.MaxOffset);
@@ -2376,7 +2376,14 @@ void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx,
       F.AM.BaseOffs = (uint64_t)Base.AM.BaseOffs - *I;
       if (isLegalUse(F.AM, LU.MinOffset - *I, LU.MaxOffset - *I,
                      LU.Kind, LU.AccessTy, TLI)) {
-        F.BaseRegs[i] = SE.getAddExpr(G, SE.getConstant(G->getType(), *I));
+        // Add the offset to the base register.
+        const SCEV *NewG = SE.getAddExpr(G, SE.getConstant(G->getType(), *I));
+        // If it cancelled out, drop the base register, otherwise update it.
+        if (NewG->isZero()) {
+          std::swap(F.BaseRegs[i], F.BaseRegs.back());
+          F.BaseRegs.pop_back();
+        } else
+          F.BaseRegs[i] = NewG;
 
         (void)InsertFormula(LU, LUIdx, F);
       }
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index 0b48a8f..8e91138 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -306,7 +306,7 @@ bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB, const TargetData *TD) {
       WeakVH BIHandle(BI);
       ReplaceAndSimplifyAllUses(Inst, V, TD);
       MadeChange = true;
-      if (BIHandle == 0)
+      if (BIHandle != BI)
         BI = BB->begin();
       continue;
     }
@@ -354,12 +354,13 @@ void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred,
     // value into all of its uses.
     assert(PNV != PN && "hasConstantValue broken");
     
+    Value *OldPhiIt = PhiIt;
     ReplaceAndSimplifyAllUses(PN, PNV, TD);
     
     // If recursive simplification ended up deleting the next PHI node we would
     // iterate to, then our iterator is invalid, restart scanning from the top
     // of the block.
-    if (PhiIt == 0) PhiIt = &BB->front();
+    if (PhiIt != OldPhiIt) PhiIt = &BB->front();
   }
 }
 
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index fd3ed3e..27b07d9 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -1377,8 +1377,9 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI) {
 bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
   BasicBlock *BB = BI->getParent();
   Instruction *Cond = dyn_cast<Instruction>(BI->getCondition());
-  if (Cond == 0) return false;
-
+  if (Cond == 0 || (!isa<CmpInst>(Cond) && !isa<BinaryOperator>(Cond)) ||
+    Cond->getParent() != BB || !Cond->hasOneUse())
+  return false;
   
   // Only allow this if the condition is a simple instruction that can be
   // executed unconditionally.  It must be in the same block as the branch, and
@@ -1387,11 +1388,23 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
   // Ignore dbg intrinsics.
   while(isa<DbgInfoIntrinsic>(FrontIt))
     ++FrontIt;
-  if ((!isa<CmpInst>(Cond) && !isa<BinaryOperator>(Cond)) ||
-      Cond->getParent() != BB || &*FrontIt != Cond || !Cond->hasOneUse()) {
-    return false;
+    
+  // Allow a single instruction to be hoisted in addition to the compare
+  // that feeds the branch.  We later ensure that any values that _it_ uses
+  // were also live in the predecessor, so that we don't unnecessarily create
+  // register pressure or inhibit out-of-order execution.
+  Instruction *BonusInst = 0;
+  if (&*FrontIt != Cond &&
+      FrontIt->hasOneUse() && *FrontIt->use_begin() == Cond &&
+      FrontIt->isSafeToSpeculativelyExecute()) {
+    BonusInst = &*FrontIt;
+    ++FrontIt;
   }
   
+  // Only a single bonus inst is allowed.
+  if (&*FrontIt != Cond)
+    return false;
+  
   // Make sure the instruction after the condition is the cond branch.
   BasicBlock::iterator CondIt = Cond; ++CondIt;
   // Ingore dbg intrinsics.
@@ -1429,6 +1442,44 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
         !SafeToMergeTerminators(BI, PBI))
       continue;
     
+    // Ensure that any values used in the bonus instruction are also used
+    // by the terminator of the predecessor.  This means that those values
+    // must already have been resolved, so we won't be inhibiting the 
+    // out-of-order core by speculating them earlier.
+    if (BonusInst) {
+      // Collect the values used by the bonus inst
+      SmallPtrSet<Value*, 4> UsedValues;
+      for (Instruction::op_iterator OI = BonusInst->op_begin(),
+           OE = BonusInst->op_end(); OI != OE; ++OI) {
+        Value* V = *OI;
+        if (!isa<Constant>(V))
+          UsedValues.insert(V);
+      }
+
+      SmallVector<std::pair<Value*, unsigned>, 4> Worklist;
+      Worklist.push_back(std::make_pair(PBI->getOperand(0), 0));
+      
+      // Walk up to four levels back up the use-def chain of the predecessor's
+      // terminator to see if all those values were used.  The choice of four
+      // levels is arbitrary, to provide a compile-time-cost bound.
+      while (!Worklist.empty()) {
+        std::pair<Value*, unsigned> Pair = Worklist.back();
+        Worklist.pop_back();
+        
+        if (Pair.second >= 4) continue;
+        UsedValues.erase(Pair.first);
+        if (UsedValues.empty()) break;
+        
+        if (Instruction* I = dyn_cast<Instruction>(Pair.first)) {
+          for (Instruction::op_iterator OI = I->op_begin(), OE = I->op_end();
+               OI != OE; ++OI)
+            Worklist.push_back(std::make_pair(OI->get(), Pair.second+1));
+        }       
+      }
+      
+      if (!UsedValues.empty()) return false;
+    }
+    
     Instruction::BinaryOps Opc;
     bool InvertPredCond = false;
 
@@ -1457,9 +1508,19 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
       PBI->setSuccessor(1, OldTrue);
     }
     
+    // If we have a bonus inst, clone it into the predecessor block.
+    Instruction *NewBonus = 0;
+    if (BonusInst) {
+      NewBonus = BonusInst->clone();
+      PredBlock->getInstList().insert(PBI, NewBonus);
+      NewBonus->takeName(BonusInst);
+      BonusInst->setName(BonusInst->getName()+".old");
+    }
+    
     // Clone Cond into the predecessor basic block, and or/and the
     // two conditions together.
     Instruction *New = Cond->clone();
+    if (BonusInst) New->replaceUsesOfWith(BonusInst, NewBonus);
     PredBlock->getInstList().insert(PBI, New);
     New->takeName(Cond);
     Cond->setName(New->getName()+".old");
diff --git a/lib/VMCore/AsmWriter.cpp b/lib/VMCore/AsmWriter.cpp
index 7a471ef..09b8aa5 100644
--- a/lib/VMCore/AsmWriter.cpp
+++ b/lib/VMCore/AsmWriter.cpp
@@ -90,8 +90,7 @@ enum PrefixType {
 /// PrintLLVMName - Turn the specified name into an 'LLVM name', which is either
 /// prefixed with % (if the string only contains simple characters) or is
 /// surrounded with ""'s (if it has special chars in it).  Print it out.
-static void PrintLLVMName(raw_ostream &OS, const StringRef &Name,
-                          PrefixType Prefix) {
+static void PrintLLVMName(raw_ostream &OS, StringRef Name, PrefixType Prefix) {
   assert(Name.data() && "Cannot get empty name!");
   switch (Prefix) {
   default: llvm_unreachable("Bad prefix!");
@@ -855,8 +854,9 @@ static void WriteOptimizationInfo(raw_ostream &Out, const User *U) {
   }
 }
 
-static void WriteConstantInt(raw_ostream &Out, const Constant *CV,
-                             TypePrinting &TypePrinter, SlotTracker *Machine) {
+static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
+                                  TypePrinting &TypePrinter,
+                                  SlotTracker *Machine) {
   if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
     if (CI->getType()->isIntegerTy(1)) {
       Out << (CI->getZExtValue() ? "true" : "false");
@@ -1147,7 +1147,7 @@ static void WriteAsOperandInternal(raw_ostream &Out, const Value *V,
   const Constant *CV = dyn_cast<Constant>(V);
   if (CV && !isa<GlobalValue>(CV)) {
     assert(TypePrinter && "Constants require TypePrinting!");
-    WriteConstantInt(Out, CV, *TypePrinter, Machine);
+    WriteConstantInternal(Out, CV, *TypePrinter, Machine);
     return;
   }
 
@@ -2128,7 +2128,7 @@ void Value::print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW) const {
   } else if (const MDNode *N = dyn_cast<MDNode>(this)) {
     const Function *F = N->getFunction();
     SlotTracker SlotTable(F);
-    AssemblyWriter W(OS, SlotTable, F ? getModuleFromVal(F) : 0, AAW);
+    AssemblyWriter W(OS, SlotTable, F ? F->getParent() : 0, AAW);
     W.printMDNodeBody(N);
   } else if (const NamedMDNode *N = dyn_cast<NamedMDNode>(this)) {
     SlotTracker SlotTable(N->getParent());
@@ -2138,7 +2138,7 @@ void Value::print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW) const {
     TypePrinting TypePrinter;
     TypePrinter.print(C->getType(), OS);
     OS << ' ';
-    WriteConstantInt(OS, C, TypePrinter, 0);
+    WriteConstantInternal(OS, C, TypePrinter, 0);
   } else if (isa<InlineAsm>(this) || isa<MDString>(this) ||
              isa<Argument>(this)) {
     WriteAsOperand(OS, this, true, 0);
diff --git a/lib/VMCore/Metadata.cpp b/lib/VMCore/Metadata.cpp
index 1d3a058..3100d4a 100644
--- a/lib/VMCore/Metadata.cpp
+++ b/lib/VMCore/Metadata.cpp
@@ -78,7 +78,8 @@ void MDNodeOperand::allUsesReplacedWith(Value *NV) {
 /// getOperandPtr - Helper function to get the MDNodeOperand's coallocated on
 /// the end of the MDNode.
 static MDNodeOperand *getOperandPtr(MDNode *N, unsigned Op) {
-  assert(Op < N->getNumOperands() && "Invalid operand number");
+  // Use <= instead of < to permit a one-past-the-end address.
+  assert(Op <= N->getNumOperands() && "Invalid operand number");
   return reinterpret_cast<MDNodeOperand*>(N+1)+Op;
 }
 
diff --git a/test/CodeGen/ARM/fpcmp-opt.ll b/test/CodeGen/ARM/fpcmp-opt.ll
index 8016033..6875288 100644
--- a/test/CodeGen/ARM/fpcmp-opt.ll
+++ b/test/CodeGen/ARM/fpcmp-opt.ll
@@ -1,16 +1,24 @@
-; RUN: llc < %s -march=arm -mattr=+vfp2 -enable-unsafe-fp-math -enable-finite-only-fp-math | FileCheck %s
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 -mattr=+vfp2 -enable-unsafe-fp-math -enable-finite-only-fp-math | FileCheck -check-prefix=FINITE %s
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 -mattr=+vfp2 -enable-unsafe-fp-math | FileCheck -check-prefix=NAN %s
 ; rdar://7461510
 
 define arm_apcscc i32 @t1(float* %a, float* %b) nounwind {
 entry:
-; CHECK: t1:
-; CHECK-NOT: vldr
-; CHECK: ldr
-; CHECK: ldr
-; CHECK: cmp r0, r1
-; CHECK-NOT: vcmpe.f32
-; CHECK-NOT: vmrs
-; CHECK: beq
+; FINITE: t1:
+; FINITE-NOT: vldr
+; FINITE: ldr
+; FINITE: ldr
+; FINITE: cmp r0, r1
+; FINITE-NOT: vcmpe.f32
+; FINITE-NOT: vmrs
+; FINITE: beq
+
+; NAN: t1:
+; NAN: vldr.32 s0,
+; NAN: vldr.32 s1,
+; NAN: vcmpe.f32 s1, s0
+; NAN: vmrs apsr_nzcv, fpscr
+; NAN: beq
   %0 = load float* %a
   %1 = load float* %b
   %2 = fcmp une float %0, %1
@@ -25,5 +33,50 @@ bb2:
   ret i32 %4
 }
 
+define arm_apcscc i32 @t2(double* %a, double* %b) nounwind {
+entry:
+; FINITE: t2:
+; FINITE-NOT: vldr
+; FINITE: ldrd r0, [r0]
+; FINITE: cmp r0, #0
+; FINITE: cmpeq r1, #0
+; FINITE-NOT: vcmpe.f32
+; FINITE-NOT: vmrs
+; FINITE: bne
+  %0 = load double* %a
+  %1 = fcmp oeq double %0, 0.000000e+00
+  br i1 %1, label %bb1, label %bb2
+
+bb1:
+  %2 = call i32 @bar()
+  ret i32 %2
+
+bb2:
+  %3 = call i32 @foo()
+  ret i32 %3
+}
+
+define arm_apcscc i32 @t3(float* %a, float* %b) nounwind {
+entry:
+; FINITE: t3:
+; FINITE-NOT: vldr
+; FINITE: ldr r0, [r0]
+; FINITE: cmp r0, #0
+; FINITE-NOT: vcmpe.f32
+; FINITE-NOT: vmrs
+; FINITE: bne
+  %0 = load float* %a
+  %1 = fcmp oeq float %0, 0.000000e+00
+  br i1 %1, label %bb1, label %bb2
+
+bb1:
+  %2 = call i32 @bar()
+  ret i32 %2
+
+bb2:
+  %3 = call i32 @foo()
+  ret i32 %3
+}
+
 declare i32 @bar()
 declare i32 @foo()
diff --git a/test/CodeGen/ARM/reg_sequence.ll b/test/CodeGen/ARM/reg_sequence.ll
index 9e365c9..89b6577 100644
--- a/test/CodeGen/ARM/reg_sequence.ll
+++ b/test/CodeGen/ARM/reg_sequence.ll
@@ -239,7 +239,7 @@ define arm_aapcs_vfpcc float @t9(%0* nocapture, %3* nocapture) nounwind {
 ; CHECK:        t9:
 ; CHECK:        vldr.64
 ; CHECK-NOT:    vmov d{{.*}}, d0
-; CHECK:        vmov.i8 d1
+; CHECK:        vmov.i32 d1
 ; CHECK-NEXT:   vstmia r0, {d0, d1}
 ; CHECK-NEXT:   vstmia r0, {d0, d1}
   %3 = bitcast double 0.000000e+00 to <2 x float> ; <<2 x float>> [#uses=2]
diff --git a/test/CodeGen/ARM/sub.ll b/test/CodeGen/ARM/sub.ll
new file mode 100644
index 0000000..81513e2
--- /dev/null
+++ b/test/CodeGen/ARM/sub.ll
@@ -0,0 +1,29 @@
+; RUN: llc -march=arm < %s | FileCheck %s
+
+; 171 = 0x000000ab
+define i64 @f1(i64 %a) {
+; CHECK: f1
+; CHECK: subs r0, r0, #171
+; CHECK: sbc r1, r1, #0
+    %tmp = sub i64 %a, 171
+    ret i64 %tmp
+}
+
+; 66846720 = 0x03fc0000
+define i64 @f2(i64 %a) {
+; CHECK: f2
+; CHECK: subs r0, r0, #255, 14
+; CHECK: sbc r1, r1, #0
+    %tmp = sub i64 %a, 66846720
+    ret i64 %tmp
+}
+
+; 734439407618 = 0x000000ab00000002
+define i64 @f3(i64 %a) {
+; CHECK: f3
+; CHECK: subs r0, r0, #2
+; CHECK: sbc r1, r1, #171
+   %tmp = sub i64 %a, 734439407618
+   ret i64 %tmp
+}
+
diff --git a/test/CodeGen/ARM/vdup.ll b/test/CodeGen/ARM/vdup.ll
index 50e4df9..a545f6c 100644
--- a/test/CodeGen/ARM/vdup.ll
+++ b/test/CodeGen/ARM/vdup.ll
@@ -267,3 +267,15 @@ entry:
   %0 = shufflevector <2 x double> %arg0_int64x1_t, <2 x double> undef, <2 x i32> <i32 0, i32 0>
   ret <2 x double> %0
 }
+
+; Radar 7373643
+;CHECK: redundantVdup:
+;CHECK: vmov.i8
+;CHECK-NOT: vdup.8
+;CHECK: vstr.64
+define void @redundantVdup(<8 x i8>* %ptr) nounwind {
+  %1 = insertelement <8 x i8> undef, i8 -128, i32 0
+  %2 = shufflevector <8 x i8> %1, <8 x i8> undef, <8 x i32> zeroinitializer
+  store <8 x i8> %2, <8 x i8>* %ptr, align 8
+  ret void
+}
diff --git a/test/CodeGen/ARM/vmov.ll b/test/CodeGen/ARM/vmov.ll
index f803018..5e872ab 100644
--- a/test/CodeGen/ARM/vmov.ll
+++ b/test/CodeGen/ARM/vmov.ll
@@ -18,6 +18,18 @@ define <4 x i16> @v_movi16b() nounwind {
 	ret <4 x i16> < i16 4096, i16 4096, i16 4096, i16 4096 >
 }
 
+define <4 x i16> @v_mvni16a() nounwind {
+;CHECK: v_mvni16a:
+;CHECK: vmvn.i16 d0, #0x10
+	ret <4 x i16> < i16 65519, i16 65519, i16 65519, i16 65519 >
+}
+
+define <4 x i16> @v_mvni16b() nounwind {
+;CHECK: v_mvni16b:
+;CHECK: vmvn.i16 d0, #0x1000
+	ret <4 x i16> < i16 61439, i16 61439, i16 61439, i16 61439 >
+}
+
 define <2 x i32> @v_movi32a() nounwind {
 ;CHECK: v_movi32a:
 ;CHECK: vmov.i32 d0, #0x20
@@ -54,6 +66,42 @@ define <2 x i32> @v_movi32f() nounwind {
 	ret <2 x i32> < i32 2162687, i32 2162687 >
 }
 
+define <2 x i32> @v_mvni32a() nounwind {
+;CHECK: v_mvni32a:
+;CHECK: vmvn.i32 d0, #0x20
+	ret <2 x i32> < i32 4294967263, i32 4294967263 >
+}
+
+define <2 x i32> @v_mvni32b() nounwind {
+;CHECK: v_mvni32b:
+;CHECK: vmvn.i32 d0, #0x2000
+	ret <2 x i32> < i32 4294959103, i32 4294959103 >
+}
+
+define <2 x i32> @v_mvni32c() nounwind {
+;CHECK: v_mvni32c:
+;CHECK: vmvn.i32 d0, #0x200000
+	ret <2 x i32> < i32 4292870143, i32 4292870143 >
+}
+
+define <2 x i32> @v_mvni32d() nounwind {
+;CHECK: v_mvni32d:
+;CHECK: vmvn.i32 d0, #0x20000000
+	ret <2 x i32> < i32 3758096383, i32 3758096383 >
+}
+
+define <2 x i32> @v_mvni32e() nounwind {
+;CHECK: v_mvni32e:
+;CHECK: vmvn.i32 d0, #0x20FF
+	ret <2 x i32> < i32 4294958848, i32 4294958848 >
+}
+
+define <2 x i32> @v_mvni32f() nounwind {
+;CHECK: v_mvni32f:
+;CHECK: vmvn.i32 d0, #0x20FFFF
+	ret <2 x i32> < i32 4292804608, i32 4292804608 >
+}
+
 define <1 x i64> @v_movi64() nounwind {
 ;CHECK: v_movi64:
 ;CHECK: vmov.i64 d0, #0xFF0000FF0000FFFF
diff --git a/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll b/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll
index e3086a3..98a5263 100644
--- a/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll
+++ b/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll
@@ -12,7 +12,7 @@ define weak arm_aapcs_vfpcc i32 @_ZNKSs7compareERKSs(%"struct.std::basic_string<
 ; CHECK: _ZNKSs7compareERKSs:
 ; CHECK:      it  eq
 ; CHECK-NEXT: subeq.w r0, r6, r8
-; CHECK-NEXT: ldmia.w sp, {r4, r5, r6, r8, r9, pc}
+; CHECK-NEXT: ldmia.w sp!, {r4, r5, r6, r8, r9, pc}
 entry:
   %0 = tail call arm_aapcs_vfpcc  i32 @_ZNKSs4sizeEv(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %this) ; <i32> [#uses=3]
   %1 = tail call arm_aapcs_vfpcc  i32 @_ZNKSs4sizeEv(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %__str) ; <i32> [#uses=3]
diff --git a/test/CodeGen/Thumb2/thumb2-sbc.ll b/test/CodeGen/Thumb2/thumb2-sbc.ll
index ad96291..de6502d 100644
--- a/test/CodeGen/Thumb2/thumb2-sbc.ll
+++ b/test/CodeGen/Thumb2/thumb2-sbc.ll
@@ -1,8 +1,54 @@
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+; RUN: llc -march=thumb -mattr=+thumb2 < %s | FileCheck %s
 
 define i64 @f1(i64 %a, i64 %b) {
-; CHECK: f1:
+; CHECK: f1
 ; CHECK: subs r0, r0, r2
     %tmp = sub i64 %a, %b
     ret i64 %tmp
 }
+
+; 734439407618 = 0x000000ab00000002
+define i64 @f2(i64 %a) {
+; CHECK: f2
+; CHECK: subs r0, #2
+; CHECK: sbc r1, r1, #171
+    %tmp = sub i64 %a, 734439407618
+    ret i64 %tmp
+}
+
+; 5066626890203138 = 0x0012001200000002
+define i64 @f3(i64 %a) {
+; CHECK: f3
+; CHECK: subs  r0, #2
+; CHECK: sbc r1, r1, #1179666
+    %tmp = sub i64 %a, 5066626890203138
+    ret i64 %tmp
+}
+
+; 3747052064576897026 = 0x3400340000000002
+define i64 @f4(i64 %a) {
+; CHECK: f4
+; CHECK: subs  r0, #2
+; CHECK: sbc r1, r1, #872428544
+    %tmp = sub i64 %a, 3747052064576897026
+    ret i64 %tmp
+}
+
+; 6221254862626095106 = 0x5656565600000002
+define i64 @f5(i64 %a) {
+; CHECK: f5
+; CHECK: subs  r0, #2
+; CHECK: adc r1, r1, #-1448498775
+    %tmp = sub i64 %a, 6221254862626095106 
+    ret i64 %tmp
+}
+
+; 287104476244869122 = 0x03fc000000000002
+define i64 @f6(i64 %a) {
+; CHECK: f6
+; CHECK: subs  r0, #2
+; CHECK: sbc r1, r1, #66846720
+    %tmp = sub i64 %a, 287104476244869122 
+    ret i64 %tmp
+}
+
diff --git a/test/CodeGen/Thumb2/thumb2-sub3.ll b/test/CodeGen/Thumb2/thumb2-sub3.ll
new file mode 100644
index 0000000..855ad06
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-sub3.ll
@@ -0,0 +1,55 @@
+; RUN: llc -march=thumb -mattr=+thumb2 < %s | FileCheck %s
+
+; 171 = 0x000000ab
+define i64 @f1(i64 %a) {
+; CHECK: f1
+; CHECK: subs  r0, #171
+; CHECK: adc r1, r1, #-1
+    %tmp = sub i64 %a, 171
+    ret i64 %tmp
+}
+
+; 1179666 = 0x00120012
+define i64 @f2(i64 %a) {
+; CHECK: f2
+; CHECK: subs.w  r0, r0, #1179666
+; CHECK: adc r1, r1, #-1
+    %tmp = sub i64 %a, 1179666
+    ret i64 %tmp
+}
+
+; 872428544 = 0x34003400
+define i64 @f3(i64 %a) {
+; CHECK: f3
+; CHECK: subs.w  r0, r0, #872428544
+; CHECK: adc r1, r1, #-1
+    %tmp = sub i64 %a, 872428544
+    ret i64 %tmp
+}
+
+; 1448498774 = 0x56565656
+define i64 @f4(i64 %a) {
+; CHECK: f4
+; CHECK: subs.w  r0, r0, #1448498774
+; CHECK: adc r1, r1, #-1
+    %tmp = sub i64 %a, 1448498774
+    ret i64 %tmp
+}
+
+; 66846720 = 0x03fc0000
+define i64 @f5(i64 %a) {
+; CHECK: f5
+; CHECK: subs.w  r0, r0, #66846720
+; CHECK: adc r1, r1, #-1
+    %tmp = sub i64 %a, 66846720
+    ret i64 %tmp
+}
+
+; 734439407618 = 0x000000ab00000002
+define i64 @f6(i64 %a) {
+; CHECK: f6
+; CHECK: subs r0, #2
+; CHECK: sbc r1, r1, #171
+   %tmp = sub i64 %a, 734439407618
+   ret i64 %tmp
+}
diff --git a/test/CodeGen/X86/2006-05-22-FPSetEQ.ll b/test/CodeGen/X86/2006-05-22-FPSetEQ.ll
index 083d068..2d7bd27 100644
--- a/test/CodeGen/X86/2006-05-22-FPSetEQ.ll
+++ b/test/CodeGen/X86/2006-05-22-FPSetEQ.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -march=x86 | grep setnp
-; RUN: llc < %s -march=x86 -enable-unsafe-fp-math | \
+; RUN: llc < %s -march=x86 -enable-unsafe-fp-math -enable-finite-only-fp-math | \
 ; RUN:   not grep setnp
 
 define i32 @test(float %f) {
diff --git a/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll b/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll
index d9655fd..4a97ac3 100644
--- a/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll
+++ b/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=x86-64 -mattr=+sse3 -stats |& grep {2 machine-licm}
-; RUN: llc < %s -march=x86-64 -mattr=+sse3 | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse41 -stats |& grep {7 machine-licm}
+; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse41 | FileCheck %s
 ; rdar://6627786
 ; rdar://7792037
 
diff --git a/test/CodeGen/X86/2010-06-28-DbgEntryPC.ll b/test/CodeGen/X86/2010-06-28-DbgEntryPC.ll
new file mode 100644
index 0000000..9b9d636
--- /dev/null
+++ b/test/CodeGen/X86/2010-06-28-DbgEntryPC.ll
@@ -0,0 +1,108 @@
+; RUN: llc -O2 -mtriple=i386-apple-darwin <%s | FileCheck %s
+; Use DW_FORM_addr for DW_AT_entry_pc.
+; Radar 8094785
+
+; CHECK:	.byte	17                      ## DW_TAG_compile_unit
+; CHECK-NEXT:	.byte	1                       ## DW_CHILDREN_yes
+; CHECK-NEXT:	.byte	37                      ## DW_AT_producer
+; CHECK-NEXT:	.byte	8                       ## DW_FORM_string
+; CHECK-NEXT:	.byte	19                      ## DW_AT_language
+; CHECK-NEXT:	.byte	11                      ## DW_FORM_data1
+; CHECK-NEXT:	.byte	3                       ## DW_AT_name
+; CHECK-NEXT:	.byte	8                       ## DW_FORM_string
+; CHECK-NEXT:	.byte	82                      ## DW_AT_entry_pc
+; CHECK-NEXT:	.byte	1                       ## DW_FORM_addr
+; CHECK-NEXT:	.byte	16                      ## DW_AT_stmt_list
+; CHECK-NEXT:	.byte	6                       ## DW_FORM_data4
+; CHECK-NEXT:	.byte	27                      ## DW_AT_comp_dir
+; CHECK-NEXT:	.byte	8                       ## DW_FORM_string
+; CHECK-NEXT:	.byte	225                     ## DW_AT_APPLE_optimized
+
+%struct.a = type { i32, %struct.a* }
+
+@ret = common global i32 0                        ; <i32*> [#uses=2]
+
+define void @foo(i32 %x) nounwind noinline ssp {
+entry:
+  tail call void @llvm.dbg.value(metadata !{i32 %x}, i64 0, metadata !21), !dbg !28
+  store i32 %x, i32* @ret, align 4, !dbg !29
+  ret void, !dbg !31
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+define i8* @bar(%struct.a* %b) nounwind noinline ssp {
+entry:
+  tail call void @llvm.dbg.value(metadata !{%struct.a* %b}, i64 0, metadata !22), !dbg !32
+  %0 = getelementptr inbounds %struct.a* %b, i64 0, i32 0, !dbg !33 ; <i32*> [#uses=1]
+  %1 = load i32* %0, align 8, !dbg !33            ; <i32> [#uses=1]
+  tail call void @foo(i32 %1) nounwind noinline ssp, !dbg !33
+  %2 = bitcast %struct.a* %b to i8*, !dbg !35     ; <i8*> [#uses=1]
+  ret i8* %2, !dbg !35
+}
+
+define i32 @main(i32 %argc, i8** nocapture %argv) nounwind ssp {
+entry:
+  %e = alloca %struct.a, align 8                  ; <%struct.a*> [#uses=4]
+  call void @llvm.dbg.value(metadata !{i32 %argc}, i64 0, metadata !23), !dbg !36
+  call void @llvm.dbg.value(metadata !{i8** %argv}, i64 0, metadata !24), !dbg !36
+  call void @llvm.dbg.declare(metadata !{%struct.a* %e}, metadata !25), !dbg !37
+  %0 = getelementptr inbounds %struct.a* %e, i64 0, i32 0, !dbg !38 ; <i32*> [#uses=1]
+  store i32 4, i32* %0, align 8, !dbg !38
+  %1 = getelementptr inbounds %struct.a* %e, i64 0, i32 1, !dbg !39 ; <%struct.a**> [#uses=1]
+  store %struct.a* %e, %struct.a** %1, align 8, !dbg !39
+  %2 = call i8* @bar(%struct.a* %e) nounwind noinline ssp, !dbg !40 ; <i8*> [#uses=0]
+  %3 = load i32* @ret, align 4, !dbg !41          ; <i32> [#uses=1]
+  ret i32 %3, !dbg !41
+}
+
+!llvm.dbg.sp = !{!0, !6, !15}
+!llvm.dbg.lv.foo = !{!21}
+!llvm.dbg.lv.bar = !{!22}
+!llvm.dbg.lv.main = !{!23, !24, !25}
+!llvm.dbg.gv = !{!27}
+
+!0 = metadata !{i32 524334, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", metadata !1, i32 34, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, void (i32)* @foo} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 524329, metadata !"2010-06-28-DbgEntryPC.c", metadata !"/Users/yash/clean/llvm/test/FrontendC", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 524305, i32 0, i32 1, metadata !"2010-06-28-DbgEntryPC.c", metadata !"/Users/yash/clean/llvm/test/FrontendC", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 524309, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{null, metadata !5}
+!5 = metadata !{i32 524324, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 524334, i32 0, metadata !1, metadata !"bar", metadata !"bar", metadata !"bar", metadata !1, i32 38, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i8* (%struct.a*)* @bar} ; [ DW_TAG_subprogram ]
+!7 = metadata !{i32 524309, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!8 = metadata !{metadata !9, metadata !10}
+!9 = metadata !{i32 524303, metadata !1, metadata !"", metadata !1, i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
+!10 = metadata !{i32 524303, metadata !1, metadata !"", metadata !1, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ]
+!11 = metadata !{i32 524307, metadata !1, metadata !"a", metadata !1, i32 23, i64 128, i64 64, i64 0, i32 0, null, metadata !12, i32 0, null} ; [ DW_TAG_structure_type ]
+!12 = metadata !{metadata !13, metadata !14}
+!13 = metadata !{i32 524301, metadata !11, metadata !"c", metadata !1, i32 24, i64 32, i64 32, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
+!14 = metadata !{i32 524301, metadata !11, metadata !"d", metadata !1, i32 25, i64 64, i64 64, i64 64, i32 0, metadata !10} ; [ DW_TAG_member ]
+!15 = metadata !{i32 524334, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"main", metadata !1, i32 43, metadata !16, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i32 (i32, i8**)* @main} ; [ DW_TAG_subprogram ]
+!16 = metadata !{i32 524309, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !17, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!17 = metadata !{metadata !5, metadata !5, metadata !18}
+!18 = metadata !{i32 524303, metadata !1, metadata !"", metadata !1, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !19} ; [ DW_TAG_pointer_type ]
+!19 = metadata !{i32 524303, metadata !1, metadata !"", metadata !1, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !20} ; [ DW_TAG_pointer_type ]
+!20 = metadata !{i32 524324, metadata !1, metadata !"char", metadata !1, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
+!21 = metadata !{i32 524545, metadata !0, metadata !"x", metadata !1, i32 33, metadata !5} ; [ DW_TAG_arg_variable ]
+!22 = metadata !{i32 524545, metadata !6, metadata !"b", metadata !1, i32 38, metadata !10} ; [ DW_TAG_arg_variable ]
+!23 = metadata !{i32 524545, metadata !15, metadata !"argc", metadata !1, i32 43, metadata !5} ; [ DW_TAG_arg_variable ]
+!24 = metadata !{i32 524545, metadata !15, metadata !"argv", metadata !1, i32 43, metadata !18} ; [ DW_TAG_arg_variable ]
+!25 = metadata !{i32 524544, metadata !26, metadata !"e", metadata !1, i32 44, metadata !11} ; [ DW_TAG_auto_variable ]
+!26 = metadata !{i32 524299, metadata !15, i32 43, i32 0} ; [ DW_TAG_lexical_block ]
+!27 = metadata !{i32 524340, i32 0, metadata !1, metadata !"ret", metadata !"ret", metadata !"", metadata !1, i32 28, metadata !5, i1 false, i1 true, i32* @ret} ; [ DW_TAG_variable ]
+!28 = metadata !{i32 33, i32 0, metadata !0, null}
+!29 = metadata !{i32 35, i32 0, metadata !30, null}
+!30 = metadata !{i32 524299, metadata !0, i32 34, i32 0} ; [ DW_TAG_lexical_block ]
+!31 = metadata !{i32 36, i32 0, metadata !30, null}
+!32 = metadata !{i32 38, i32 0, metadata !6, null}
+!33 = metadata !{i32 39, i32 0, metadata !34, null}
+!34 = metadata !{i32 524299, metadata !6, i32 38, i32 0} ; [ DW_TAG_lexical_block ]
+!35 = metadata !{i32 40, i32 0, metadata !34, null}
+!36 = metadata !{i32 43, i32 0, metadata !15, null}
+!37 = metadata !{i32 44, i32 0, metadata !26, null}
+!38 = metadata !{i32 45, i32 0, metadata !26, null}
+!39 = metadata !{i32 46, i32 0, metadata !26, null}
+!40 = metadata !{i32 48, i32 0, metadata !26, null}
+!41 = metadata !{i32 49, i32 0, metadata !26, null}
diff --git a/test/CodeGen/X86/2010-07-06-asm-RIP.ll b/test/CodeGen/X86/2010-07-06-asm-RIP.ll
index f646afa..9526b8d 100644
--- a/test/CodeGen/X86/2010-07-06-asm-RIP.ll
+++ b/test/CodeGen/X86/2010-07-06-asm-RIP.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
-; PR 7528
+; PR 4752
 
 @n = global i32 0                                 ; <i32*> [#uses=2]
 
diff --git a/test/CodeGen/X86/2010-07-13-indirectXconstraint.ll b/test/CodeGen/X86/2010-07-13-indirectXconstraint.ll
new file mode 100644
index 0000000..97cbe3e
--- /dev/null
+++ b/test/CodeGen/X86/2010-07-13-indirectXconstraint.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+; PR 7528
+; formerly crashed
+
+%0 = type { [12 x i16] }
+%union..0anon = type { [3 x <1 x i64>] }
+
+@gsm_H.1466 = internal constant %0 { [12 x i16] [i16 -134, i16 -374, i16 0, i16 2054, i16 5741, i16 8192, i16 5741, i16 2054, i16 0, i16 -374, i16 -134, i16 0] }, align 8 ; <%0*> [#uses=1]
+
+define void @weighting_filter() nounwind ssp {
+entry:
+; CHECK: leaq _gsm_H.1466(%rip),%rax;
+  call void asm sideeffect "leaq $0,%rax;\0A", "*X,~{dirflag},~{fpsr},~{flags},~{memory},~{rax}"(%union..0anon* bitcast (%0* @gsm_H.1466 to %union..0anon*)) nounwind
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+}
diff --git a/test/CodeGen/X86/fabs.ll b/test/CodeGen/X86/fabs.ll
index 54947c3..23b45eb 100644
--- a/test/CodeGen/X86/fabs.ll
+++ b/test/CodeGen/X86/fabs.ll
@@ -1,7 +1,7 @@
 ; Make sure this testcase codegens to the fabs instruction, not a call to fabsf
 ; RUN: llc < %s -march=x86 -mattr=-sse2,-sse3,-sse | grep fabs\$ | \
 ; RUN:   count 2
-; RUN: llc < %s -march=x86 -mattr=-sse,-sse2,-sse3 -enable-unsafe-fp-math | \
+; RUN: llc < %s -march=x86 -mattr=-sse,-sse2,-sse3 -enable-unsafe-fp-math -enable-finite-only-fp-math | \
 ; RUN:   grep fabs\$ | count 3
 
 declare float @fabsf(float)
diff --git a/test/CodeGen/X86/fast-isel-loads.ll b/test/CodeGen/X86/fast-isel-loads.ll
deleted file mode 100644
index 2fbb46c..0000000
--- a/test/CodeGen/X86/fast-isel-loads.ll
+++ /dev/null
@@ -1,23 +0,0 @@
-; RUN: llc -march=x86-64 -O0 -asm-verbose=false < %s | FileCheck %s
-
-; Fast-isel shouldn't reload the argument values from the stack.
-
-; CHECK: foo:
-; CHECK-NEXT: movq  %rdi, -8(%rsp)
-; CHECK-NEXT: movq  %rsi, -16(%rsp)
-; CHECK-NEXT: movsd 128(%rsi,%rdi,8), %xmm0
-; CHECK-NEXT: ret
-
-define double @foo(i64 %x, double* %p) nounwind {
-entry:
-  %x.addr = alloca i64, align 8                   ; <i64*> [#uses=2]
-  %p.addr = alloca double*, align 8               ; <double**> [#uses=2]
-  store i64 %x, i64* %x.addr
-  store double* %p, double** %p.addr
-  %tmp = load i64* %x.addr                        ; <i64> [#uses=1]
-  %tmp1 = load double** %p.addr                   ; <double*> [#uses=1]
-  %add = add nsw i64 %tmp, 16                     ; <i64> [#uses=1]
-  %arrayidx = getelementptr inbounds double* %tmp1, i64 %add ; <double*> [#uses=1]
-  %tmp2 = load double* %arrayidx                  ; <double> [#uses=1]
-  ret double %tmp2
-}
diff --git a/test/CodeGen/X86/lsr-i386.ll b/test/CodeGen/X86/lsr-i386.ll
new file mode 100644
index 0000000..02baf20
--- /dev/null
+++ b/test/CodeGen/X86/lsr-i386.ll
@@ -0,0 +1,44 @@
+; RUN: llc -march=x86 < %s | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
+target triple = "i386-pc-linux-gnu"
+; PR7651
+
+; CHECK: align
+; CHECK: align
+; CHECK: align
+; CHECK: movl  $0, (%e
+; CHECK-NEXT: addl  $4, %e
+; CHECK-NEXT: decl  %e
+; CHECK-NEXT: jne
+
+%struct.anon = type { [72 x i32], i32 }
+
+@mp2grad_ = external global %struct.anon
+
+define void @chomp2g_setup_(i32 %n, i32 %m) nounwind {
+entry:
+  br label %bb1
+
+bb1:                                              ; preds = %bb6, %bb
+  %indvar11 = phi i32 [ %indvar.next12, %bb6 ], [ 0, %entry ] ; <i32> [#uses=2]
+  %tmp21 = add i32 %indvar11, 1                   ; <i32> [#uses=1]
+  %t = load i32* getelementptr inbounds (%struct.anon* @mp2grad_, i32 0, i32 1)
+  %tmp15 = mul i32 %n, %t                      ; <i32> [#uses=1]
+  %tmp16 = add i32 %tmp21, %tmp15                 ; <i32> [#uses=1]
+  %tmp17 = shl i32 %tmp16, 3                      ; <i32> [#uses=1]
+  %tmp18 = add i32 %tmp17, -8                     ; <i32> [#uses=1]
+  br label %bb2
+
+bb2:                                              ; preds = %bb2, %bb2.preheader
+  %indvar = phi i32 [ 0, %bb1 ], [ %indvar.next, %bb2 ] ; <i32> [#uses=2]
+  %tmp19 = add i32 %tmp18, %indvar                ; <i32> [#uses=1]
+  %scevgep = getelementptr %struct.anon* @mp2grad_, i32 0, i32 0, i32 %tmp19 ; <i32*> [#uses=1]
+  store i32 0, i32* %scevgep
+  %indvar.next = add i32 %indvar, 1               ; <i32> [#uses=1]
+  %c = icmp ne i32 %indvar.next, %m
+  br i1 %c, label %bb2, label %bb6
+
+bb6:                                              ; preds = %bb2, %bb1
+  %indvar.next12 = add i32 %indvar11, 1           ; <i32> [#uses=1]
+  br label %bb1
+}
diff --git a/test/CodeGen/X86/sibcall-4.ll b/test/CodeGen/X86/sibcall-4.ll
new file mode 100644
index 0000000..1499e668
--- /dev/null
+++ b/test/CodeGen/X86/sibcall-4.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -mtriple=i386-pc-linux-gnu | FileCheck %s
+; pr7610
+
+define cc10 void @t(i32* %Base_Arg, i32* %Sp_Arg, i32* %Hp_Arg, i32 %R1_Arg) nounwind {
+cm1:
+; CHECK: t:
+; CHECK: jmpl *%eax
+  %nm3 = getelementptr i32* %Sp_Arg, i32 1
+  %nm9 = load i32* %Sp_Arg
+  %nma = inttoptr i32 %nm9 to void (i32*, i32*, i32*, i32)*
+  tail call cc10 void %nma(i32* %Base_Arg, i32* %nm3, i32* %Hp_Arg, i32 %R1_Arg) nounwind
+  ret void
+}
diff --git a/test/CodeGen/X86/sse-minmax.ll b/test/CodeGen/X86/sse-minmax.ll
index d265bd7..ebcdc65 100644
--- a/test/CodeGen/X86/sse-minmax.ll
+++ b/test/CodeGen/X86/sse-minmax.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -march=x86-64 -asm-verbose=false | FileCheck %s
-; RUN: llc < %s -march=x86-64 -asm-verbose=false -enable-unsafe-fp-math | FileCheck -check-prefix=UNSAFE %s
+; RUN: llc < %s -march=x86-64 -asm-verbose=false -enable-unsafe-fp-math -enable-finite-only-fp-math | FileCheck -check-prefix=UNSAFE %s
 ; RUN: llc < %s -march=x86-64 -asm-verbose=false -enable-finite-only-fp-math | FileCheck -check-prefix=FINITE %s
 
 ; Some of these patterns can be matched as SSE min or max. Some of
diff --git a/test/Feature/metadata.ll b/test/Feature/metadata.ll
index 3e2cd3c..d43815b 100644
--- a/test/Feature/metadata.ll
+++ b/test/Feature/metadata.ll
@@ -9,5 +9,7 @@ define void @foo() {
 declare void @llvm.zonk(metadata, i64, metadata) nounwind readnone
 
 !named = !{!0}
+!another_named = !{}
 !0 = metadata !{i8** null}
-!1 = metadata !{i8* null}
+!1 = metadata !{i8* null, metadata !2}
+!2 = metadata !{}
diff --git a/test/FrontendC/2010-06-28-DbgEntryPC.c b/test/FrontendC/2010-06-28-DbgEntryPC.c
deleted file mode 100644
index 76a6b51..0000000
--- a/test/FrontendC/2010-06-28-DbgEntryPC.c
+++ /dev/null
@@ -1,50 +0,0 @@
-// RUN: %llvmgcc -S -O2 -g %s -o - | llc -O2 -mtriple=i386-apple-darwin | FileCheck %s
-// Use DW_FORM_addr for DW_AT_entry_pc.
-// Radar 8094785
-
-// XFAIL: *
-// XTARGET: x86,i386,i686
-// CHECK:	.byte	17                      ## DW_TAG_compile_unit
-// CHECK-NEXT:	.byte	1                       ## DW_CHILDREN_yes
-// CHECK-NEXT:	.byte	37                      ## DW_AT_producer
-// CHECK-NEXT:	.byte	8                       ## DW_FORM_string
-// CHECK-NEXT:	.byte	19                      ## DW_AT_language
-// CHECK-NEXT:	.byte	11                      ## DW_FORM_data1
-// CHECK-NEXT:	.byte	3                       ## DW_AT_name
-// CHECK-NEXT:	.byte	8                       ## DW_FORM_string
-// CHECK-NEXT:	.byte	82                      ## DW_AT_entry_pc
-// CHECK-NEXT:	.byte	1                       ## DW_FORM_addr
-// CHECK-NEXT:	.byte	16                      ## DW_AT_stmt_list
-// CHECK-NEXT:	.byte	6                       ## DW_FORM_data4
-// CHECK-NEXT:	.byte	27                      ## DW_AT_comp_dir
-// CHECK-NEXT:	.byte	8                       ## DW_FORM_string
-// CHECK-NEXT:	.byte	225                     ## DW_AT_APPLE_optimized
-
-struct a {
-  int c;
-  struct a *d;
-};
-
-int ret;
-
-void foo(int x) __attribute__((noinline));
-void *bar(struct a *b) __attribute__((noinline));
-
-void foo(int x)
-{
-  ret = x;
-}
-
-void *bar(struct a *b) {
-  foo(b->c);
-  return b;
-}
-
-int main(int argc, char *argv[]) {
-  struct a e;
-  e.c = 4;
-  e.d = &e;
-
-  (void)bar(&e);
-  return ret;
-}
diff --git a/test/FrontendC/2010-07-14-overconservative-align.c b/test/FrontendC/2010-07-14-overconservative-align.c
new file mode 100644
index 0000000..65fbdb8
--- /dev/null
+++ b/test/FrontendC/2010-07-14-overconservative-align.c
@@ -0,0 +1,14 @@
+// RUN: %llvmgcc %s -emit-llvm -m64 -S -o - | FileCheck %s
+// PR 5995
+struct s {
+    int word;
+    struct {
+        int filler __attribute__ ((aligned (8)));
+    };
+};
+
+void func (struct s *s)
+{
+// CHECK: load %struct.s** %s_addr, align 8
+    s->word = 0;
+}
diff --git a/test/FrontendC/2010-07-14-ref-off-end.c b/test/FrontendC/2010-07-14-ref-off-end.c
new file mode 100644
index 0000000..6ccd05b
--- /dev/null
+++ b/test/FrontendC/2010-07-14-ref-off-end.c
@@ -0,0 +1,27 @@
+// RUN: %llvmgcc %s -S -m32 -o - | FileCheck %s
+// Formerly this generated code that did a load past the end of the structure.
+// That was fixed by 46726, but that patch had bad side effects and was
+// reverted.  This has been fixed another way in the meantime.
+extern void abort();
+extern void exit(int);
+struct T
+{
+unsigned i:8;
+unsigned c:24;
+};
+f(struct T t)
+{
+struct T s[1];
+s[0]=t;
+return(char)s->c;
+}
+main()
+{
+// CHECK:  getelementptr inbounds %struct.T* %t, i32 0, i32 0 ; <i32*> [#uses=2]
+// CHECK:  getelementptr inbounds %struct.T* %t, i32 0, i32 0 ; <i32*> [#uses=2]
+struct T t;
+t.i=0xff;
+t.c=0xffff11;
+if(f(t)!=0x11)abort();
+exit(0);
+}
diff --git a/test/FrontendC/vla-1.c b/test/FrontendC/vla-1.c
new file mode 100644
index 0000000..76f6c53
--- /dev/null
+++ b/test/FrontendC/vla-1.c
@@ -0,0 +1,8 @@
+// RUN: true
+// %llvmgcc -std=gnu99 %s -S |& grep {error: "is greater than the stack alignment" } 
+
+int foo(int a)
+{
+  int var[a] __attribute__((__aligned__(32)));
+  return 4;
+}
diff --git a/test/MC/AsmParser/X86/x86_32-encoding.s b/test/MC/AsmParser/X86/x86_32-encoding.s
index ea4a36e..ebafb11 100644
--- a/test/MC/AsmParser/X86/x86_32-encoding.s
+++ b/test/MC/AsmParser/X86/x86_32-encoding.s
@@ -12598,3 +12598,331 @@
 // CHECK: encoding: [0xc5,0xed,0x55,0x6c,0xcb,0xfc]
           vandnpd  -4(%ebx,%ecx,8), %ymm2, %ymm5
 
+// CHECK: vcvtps2pd  %xmm3, %ymm2
+// CHECK: encoding: [0xc5,0xfc,0x5a,0xd3]
+          vcvtps2pd  %xmm3, %ymm2
+
+// CHECK: vcvtps2pd  (%eax), %ymm2
+// CHECK: encoding: [0xc5,0xfc,0x5a,0x10]
+          vcvtps2pd  (%eax), %ymm2
+
+// CHECK: vcvtdq2pd  %xmm3, %ymm2
+// CHECK: encoding: [0xc5,0xfe,0xe6,0xd3]
+          vcvtdq2pd  %xmm3, %ymm2
+
+// CHECK: vcvtdq2pd  (%eax), %ymm2
+// CHECK: encoding: [0xc5,0xfe,0xe6,0x10]
+          vcvtdq2pd  (%eax), %ymm2
+
+// CHECK: vcvtdq2ps  %ymm2, %ymm5
+// CHECK: encoding: [0xc5,0xfc,0x5b,0xea]
+          vcvtdq2ps  %ymm2, %ymm5
+
+// CHECK: vcvtdq2ps  (%eax), %ymm2
+// CHECK: encoding: [0xc5,0xfc,0x5b,0x10]
+          vcvtdq2ps  (%eax), %ymm2
+
+// CHECK: vcvtps2dq  %ymm2, %ymm5
+// CHECK: encoding: [0xc5,0xfd,0x5b,0xea]
+          vcvtps2dq  %ymm2, %ymm5
+
+// CHECK: vcvtps2dq  (%eax), %ymm5
+// CHECK: encoding: [0xc5,0xfd,0x5b,0x28]
+          vcvtps2dq  (%eax), %ymm5
+
+// CHECK: vcvttps2dq  %ymm2, %ymm5
+// CHECK: encoding: [0xc5,0xfe,0x5b,0xea]
+          vcvttps2dq  %ymm2, %ymm5
+
+// CHECK: vcvttps2dq  (%eax), %ymm5
+// CHECK: encoding: [0xc5,0xfe,0x5b,0x28]
+          vcvttps2dq  (%eax), %ymm5
+
+// CHECK: vcvttpd2dq  %xmm1, %xmm5
+// CHECK: encoding: [0xc5,0xf9,0xe6,0xe9]
+          vcvttpd2dq  %xmm1, %xmm5
+
+// CHECK: vcvttpd2dq  %ymm2, %xmm5
+// CHECK: encoding: [0xc5,0xfd,0xe6,0xea]
+          vcvttpd2dq  %ymm2, %xmm5
+
+// CHECK: vcvttpd2dqx  %xmm1, %xmm5
+// CHECK: encoding: [0xc5,0xf9,0xe6,0xe9]
+          vcvttpd2dqx  %xmm1, %xmm5
+
+// CHECK: vcvttpd2dqx  (%eax), %xmm1
+// CHECK: encoding: [0xc5,0xf9,0xe6,0x08]
+          vcvttpd2dqx  (%eax), %xmm1
+
+// CHECK: vcvttpd2dqy  %ymm2, %xmm1
+// CHECK: encoding: [0xc5,0xfd,0xe6,0xca]
+          vcvttpd2dqy  %ymm2, %xmm1
+
+// CHECK: vcvttpd2dqy  (%eax), %xmm1
+// CHECK: encoding: [0xc5,0xfd,0xe6,0x08]
+          vcvttpd2dqy  (%eax), %xmm1
+
+// CHECK: vcvtpd2ps  %ymm2, %xmm5
+// CHECK: encoding: [0xc5,0xfd,0x5a,0xea]
+          vcvtpd2ps  %ymm2, %xmm5
+
+// CHECK: vcvtpd2psx  %xmm1, %xmm5
+// CHECK: encoding: [0xc5,0xf9,0x5a,0xe9]
+          vcvtpd2psx  %xmm1, %xmm5
+
+// CHECK: vcvtpd2psx  (%eax), %xmm1
+// CHECK: encoding: [0xc5,0xf9,0x5a,0x08]
+          vcvtpd2psx  (%eax), %xmm1
+
+// CHECK: vcvtpd2psy  %ymm2, %xmm1
+// CHECK: encoding: [0xc5,0xfd,0x5a,0xca]
+          vcvtpd2psy  %ymm2, %xmm1
+
+// CHECK: vcvtpd2psy  (%eax), %xmm1
+// CHECK: encoding: [0xc5,0xfd,0x5a,0x08]
+          vcvtpd2psy  (%eax), %xmm1
+
+// CHECK: vcvtpd2dq  %ymm2, %xmm5
+// CHECK: encoding: [0xc5,0xff,0xe6,0xea]
+          vcvtpd2dq  %ymm2, %xmm5
+
+// CHECK: vcvtpd2dqy  %ymm2, %xmm1
+// CHECK: encoding: [0xc5,0xff,0xe6,0xca]
+          vcvtpd2dqy  %ymm2, %xmm1
+
+// CHECK: vcvtpd2dqy  (%eax), %xmm1
+// CHECK: encoding: [0xc5,0xff,0xe6,0x08]
+          vcvtpd2dqy  (%eax), %xmm1
+
+// CHECK: vcvtpd2dqx  %xmm1, %xmm5
+// CHECK: encoding: [0xc5,0xfb,0xe6,0xe9]
+          vcvtpd2dqx  %xmm1, %xmm5
+
+// CHECK: vcvtpd2dqx  (%eax), %xmm1
+// CHECK: encoding: [0xc5,0xfb,0xe6,0x08]
+          vcvtpd2dqx  (%eax), %xmm1
+
+// CHECK: vcmpps  $0, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x00]
+          vcmpeqps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps  $2, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x02]
+          vcmpleps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps  $1, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x01]
+          vcmpltps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps  $4, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x04]
+          vcmpneqps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps  $6, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x06]
+          vcmpnleps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps  $5, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x05]
+          vcmpnltps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps  $7, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x07]
+          vcmpordps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps  $3, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x03]
+          vcmpunordps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps  $0, -4(%ebx,%ecx,8), %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0x5c,0xcb,0xfc,0x00]
+          vcmpeqps -4(%ebx,%ecx,8), %ymm2, %ymm3
+
+// CHECK: vcmpps  $2, -4(%ebx,%ecx,8), %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0x5c,0xcb,0xfc,0x02]
+          vcmpleps -4(%ebx,%ecx,8), %ymm2, %ymm3
+
+// CHECK: vcmpps  $1, -4(%ebx,%ecx,8), %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0x5c,0xcb,0xfc,0x01]
+          vcmpltps -4(%ebx,%ecx,8), %ymm2, %ymm3
+
+// CHECK: vcmpps  $4, -4(%ebx,%ecx,8), %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0x5c,0xcb,0xfc,0x04]
+          vcmpneqps -4(%ebx,%ecx,8), %ymm2, %ymm3
+
+// CHECK: vcmpps  $6, -4(%ebx,%ecx,8), %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0x5c,0xcb,0xfc,0x06]
+          vcmpnleps -4(%ebx,%ecx,8), %ymm2, %ymm3
+
+// CHECK: vcmpps  $5, -4(%ebx,%ecx,8), %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0x5c,0xcb,0xfc,0x05]
+          vcmpnltps -4(%ebx,%ecx,8), %ymm2, %ymm3
+
+// CHECK: vcmpps  $7, -4(%ebx,%ecx,8), %ymm6, %ymm2
+// CHECK: encoding: [0xc5,0xcc,0xc2,0x54,0xcb,0xfc,0x07]
+          vcmpordps -4(%ebx,%ecx,8), %ymm6, %ymm2
+
+// CHECK: vcmpps  $3, -4(%ebx,%ecx,8), %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0x5c,0xcb,0xfc,0x03]
+          vcmpunordps -4(%ebx,%ecx,8), %ymm2, %ymm3
+
+// CHECK: vcmppd  $0, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xed,0xc2,0xd9,0x00]
+          vcmpeqpd %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmppd  $2, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xed,0xc2,0xd9,0x02]
+          vcmplepd %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmppd  $1, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xed,0xc2,0xd9,0x01]
+          vcmpltpd %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmppd  $4, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xed,0xc2,0xd9,0x04]
+          vcmpneqpd %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmppd  $6, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xed,0xc2,0xd9,0x06]
+          vcmpnlepd %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmppd  $5, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xed,0xc2,0xd9,0x05]
+          vcmpnltpd %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmppd  $7, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xed,0xc2,0xd9,0x07]
+          vcmpordpd %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmppd  $3, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xed,0xc2,0xd9,0x03]
+          vcmpunordpd %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmppd  $0, -4(%ebx,%ecx,8), %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xed,0xc2,0x5c,0xcb,0xfc,0x00]
+          vcmpeqpd -4(%ebx,%ecx,8), %ymm2, %ymm3
+
+// CHECK: vcmppd  $2, -4(%ebx,%ecx,8), %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xed,0xc2,0x5c,0xcb,0xfc,0x02]
+          vcmplepd -4(%ebx,%ecx,8), %ymm2, %ymm3
+
+// CHECK: vcmppd  $1, -4(%ebx,%ecx,8), %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xed,0xc2,0x5c,0xcb,0xfc,0x01]
+          vcmpltpd -4(%ebx,%ecx,8), %ymm2, %ymm3
+
+// CHECK: vcmppd  $4, -4(%ebx,%ecx,8), %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xed,0xc2,0x5c,0xcb,0xfc,0x04]
+          vcmpneqpd -4(%ebx,%ecx,8), %ymm2, %ymm3
+
+// CHECK: vcmppd  $6, -4(%ebx,%ecx,8), %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xed,0xc2,0x5c,0xcb,0xfc,0x06]
+          vcmpnlepd -4(%ebx,%ecx,8), %ymm2, %ymm3
+
+// CHECK: vcmppd  $5, -4(%ebx,%ecx,8), %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xed,0xc2,0x5c,0xcb,0xfc,0x05]
+          vcmpnltpd -4(%ebx,%ecx,8), %ymm2, %ymm3
+
+// CHECK: vcmppd  $7, -4(%ebx,%ecx,8), %ymm6, %ymm2
+// CHECK: encoding: [0xc5,0xcd,0xc2,0x54,0xcb,0xfc,0x07]
+          vcmpordpd -4(%ebx,%ecx,8), %ymm6, %ymm2
+
+// CHECK: vcmppd  $3, -4(%ebx,%ecx,8), %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xed,0xc2,0x5c,0xcb,0xfc,0x03]
+          vcmpunordpd -4(%ebx,%ecx,8), %ymm2, %ymm3
+
+// CHECK: vcmpps  $8, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x08]
+          vcmpeq_uqps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps  $9, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x09]
+          vcmpngeps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps  $10, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x0a]
+          vcmpngtps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps  $11, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x0b]
+          vcmpfalseps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps  $12, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x0c]
+          vcmpneq_oqps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps  $13, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x0d]
+          vcmpgeps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps  $14, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x0e]
+          vcmpgtps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps  $15, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x0f]
+          vcmptrueps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps  $16, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x10]
+          vcmpeq_osps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps  $17, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x11]
+          vcmplt_oqps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps  $18, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x12]
+          vcmple_oqps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps  $19, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x13]
+          vcmpunord_sps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps  $20, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x14]
+          vcmpneq_usps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps  $21, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x15]
+          vcmpnlt_uqps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps  $22, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x16]
+          vcmpnle_uqps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps  $23, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x17]
+          vcmpord_sps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps  $24, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x18]
+          vcmpeq_usps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps  $25, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x19]
+          vcmpnge_uqps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps  $26, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x1a]
+          vcmpngt_uqps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps  $27, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x1b]
+          vcmpfalse_osps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps  $28, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x1c]
+          vcmpneq_osps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps  $29, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x1d]
+          vcmpge_oqps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps  $30, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x1e]
+          vcmpgt_oqps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps  $31, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x1f]
+          vcmptrue_usps %ymm1, %ymm2, %ymm3
+
diff --git a/test/MC/AsmParser/X86/x86_64-encoding.s b/test/MC/AsmParser/X86/x86_64-encoding.s
index 4efcbc7..0fce592 100644
--- a/test/MC/AsmParser/X86/x86_64-encoding.s
+++ b/test/MC/AsmParser/X86/x86_64-encoding.s
@@ -2672,3 +2672,331 @@ pshufb	CPI1_0(%rip), %xmm1
 // CHECK: encoding: [0xc5,0x1d,0x55,0x54,0xcb,0xfc]
           vandnpd  -4(%rbx,%rcx,8), %ymm12, %ymm10
 
+// CHECK: vcvtps2pd  %xmm13, %ymm12
+// CHECK: encoding: [0xc4,0x41,0x7c,0x5a,0xe5]
+          vcvtps2pd  %xmm13, %ymm12
+
+// CHECK: vcvtps2pd  (%rax), %ymm12
+// CHECK: encoding: [0xc5,0x7c,0x5a,0x20]
+          vcvtps2pd  (%rax), %ymm12
+
+// CHECK: vcvtdq2pd  %xmm13, %ymm12
+// CHECK: encoding: [0xc4,0x41,0x7e,0xe6,0xe5]
+          vcvtdq2pd  %xmm13, %ymm12
+
+// CHECK: vcvtdq2pd  (%rax), %ymm12
+// CHECK: encoding: [0xc5,0x7e,0xe6,0x20]
+          vcvtdq2pd  (%rax), %ymm12
+
+// CHECK: vcvtdq2ps  %ymm12, %ymm10
+// CHECK: encoding: [0xc4,0x41,0x7c,0x5b,0xd4]
+          vcvtdq2ps  %ymm12, %ymm10
+
+// CHECK: vcvtdq2ps  (%rax), %ymm12
+// CHECK: encoding: [0xc5,0x7c,0x5b,0x20]
+          vcvtdq2ps  (%rax), %ymm12
+
+// CHECK: vcvtps2dq  %ymm12, %ymm10
+// CHECK: encoding: [0xc4,0x41,0x7d,0x5b,0xd4]
+          vcvtps2dq  %ymm12, %ymm10
+
+// CHECK: vcvtps2dq  (%rax), %ymm10
+// CHECK: encoding: [0xc5,0x7d,0x5b,0x10]
+          vcvtps2dq  (%rax), %ymm10
+
+// CHECK: vcvttps2dq  %ymm12, %ymm10
+// CHECK: encoding: [0xc4,0x41,0x7e,0x5b,0xd4]
+          vcvttps2dq  %ymm12, %ymm10
+
+// CHECK: vcvttps2dq  (%rax), %ymm10
+// CHECK: encoding: [0xc5,0x7e,0x5b,0x10]
+          vcvttps2dq  (%rax), %ymm10
+
+// CHECK: vcvttpd2dq  %xmm11, %xmm10
+// CHECK: encoding: [0xc4,0x41,0x79,0xe6,0xd3]
+          vcvttpd2dq  %xmm11, %xmm10
+
+// CHECK: vcvttpd2dq  %ymm12, %xmm10
+// CHECK: encoding: [0xc4,0x41,0x7d,0xe6,0xd4]
+          vcvttpd2dq  %ymm12, %xmm10
+
+// CHECK: vcvttpd2dqx  %xmm11, %xmm10
+// CHECK: encoding: [0xc4,0x41,0x79,0xe6,0xd3]
+          vcvttpd2dqx  %xmm11, %xmm10
+
+// CHECK: vcvttpd2dqx  (%rax), %xmm11
+// CHECK: encoding: [0xc5,0x79,0xe6,0x18]
+          vcvttpd2dqx  (%rax), %xmm11
+
+// CHECK: vcvttpd2dqy  %ymm12, %xmm11
+// CHECK: encoding: [0xc4,0x41,0x7d,0xe6,0xdc]
+          vcvttpd2dqy  %ymm12, %xmm11
+
+// CHECK: vcvttpd2dqy  (%rax), %xmm11
+// CHECK: encoding: [0xc5,0x7d,0xe6,0x18]
+          vcvttpd2dqy  (%rax), %xmm11
+
+// CHECK: vcvtpd2ps  %ymm12, %xmm10
+// CHECK: encoding: [0xc4,0x41,0x7d,0x5a,0xd4]
+          vcvtpd2ps  %ymm12, %xmm10
+
+// CHECK: vcvtpd2psx  %xmm11, %xmm10
+// CHECK: encoding: [0xc4,0x41,0x79,0x5a,0xd3]
+          vcvtpd2psx  %xmm11, %xmm10
+
+// CHECK: vcvtpd2psx  (%rax), %xmm11
+// CHECK: encoding: [0xc5,0x79,0x5a,0x18]
+          vcvtpd2psx  (%rax), %xmm11
+
+// CHECK: vcvtpd2psy  %ymm12, %xmm11
+// CHECK: encoding: [0xc4,0x41,0x7d,0x5a,0xdc]
+          vcvtpd2psy  %ymm12, %xmm11
+
+// CHECK: vcvtpd2psy  (%rax), %xmm11
+// CHECK: encoding: [0xc5,0x7d,0x5a,0x18]
+          vcvtpd2psy  (%rax), %xmm11
+
+// CHECK: vcvtpd2dq  %ymm12, %xmm10
+// CHECK: encoding: [0xc4,0x41,0x7f,0xe6,0xd4]
+          vcvtpd2dq  %ymm12, %xmm10
+
+// CHECK: vcvtpd2dqy  %ymm12, %xmm11
+// CHECK: encoding: [0xc4,0x41,0x7f,0xe6,0xdc]
+          vcvtpd2dqy  %ymm12, %xmm11
+
+// CHECK: vcvtpd2dqy  (%rax), %xmm11
+// CHECK: encoding: [0xc5,0x7f,0xe6,0x18]
+          vcvtpd2dqy  (%rax), %xmm11
+
+// CHECK: vcvtpd2dqx  %xmm11, %xmm10
+// CHECK: encoding: [0xc4,0x41,0x7b,0xe6,0xd3]
+          vcvtpd2dqx  %xmm11, %xmm10
+
+// CHECK: vcvtpd2dqx  (%rax), %xmm11
+// CHECK: encoding: [0xc5,0x7b,0xe6,0x18]
+          vcvtpd2dqx  (%rax), %xmm11
+
+// CHECK: vcmpps  $0, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x00]
+          vcmpeqps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps  $2, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x02]
+          vcmpleps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps  $1, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x01]
+          vcmpltps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps  $4, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x04]
+          vcmpneqps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps  $6, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x06]
+          vcmpnleps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps  $5, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x05]
+          vcmpnltps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps  $7, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x07]
+          vcmpordps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps  $3, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x03]
+          vcmpunordps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps  $0, -4(%rbx,%rcx,8), %ymm12, %ymm13
+// CHECK: encoding: [0xc5,0x1c,0xc2,0x6c,0xcb,0xfc,0x00]
+          vcmpeqps -4(%rbx,%rcx,8), %ymm12, %ymm13
+
+// CHECK: vcmpps  $2, -4(%rbx,%rcx,8), %ymm12, %ymm13
+// CHECK: encoding: [0xc5,0x1c,0xc2,0x6c,0xcb,0xfc,0x02]
+          vcmpleps -4(%rbx,%rcx,8), %ymm12, %ymm13
+
+// CHECK: vcmpps  $1, -4(%rbx,%rcx,8), %ymm12, %ymm13
+// CHECK: encoding: [0xc5,0x1c,0xc2,0x6c,0xcb,0xfc,0x01]
+          vcmpltps -4(%rbx,%rcx,8), %ymm12, %ymm13
+
+// CHECK: vcmpps  $4, -4(%rbx,%rcx,8), %ymm12, %ymm13
+// CHECK: encoding: [0xc5,0x1c,0xc2,0x6c,0xcb,0xfc,0x04]
+          vcmpneqps -4(%rbx,%rcx,8), %ymm12, %ymm13
+
+// CHECK: vcmpps  $6, -4(%rbx,%rcx,8), %ymm12, %ymm13
+// CHECK: encoding: [0xc5,0x1c,0xc2,0x6c,0xcb,0xfc,0x06]
+          vcmpnleps -4(%rbx,%rcx,8), %ymm12, %ymm13
+
+// CHECK: vcmpps  $5, -4(%rbx,%rcx,8), %ymm12, %ymm13
+// CHECK: encoding: [0xc5,0x1c,0xc2,0x6c,0xcb,0xfc,0x05]
+          vcmpnltps -4(%rbx,%rcx,8), %ymm12, %ymm13
+
+// CHECK: vcmpps  $7, -4(%rbx,%rcx,8), %ymm6, %ymm12
+// CHECK: encoding: [0xc5,0x4c,0xc2,0x64,0xcb,0xfc,0x07]
+          vcmpordps -4(%rbx,%rcx,8), %ymm6, %ymm12
+
+// CHECK: vcmpps  $3, -4(%rbx,%rcx,8), %ymm12, %ymm13
+// CHECK: encoding: [0xc5,0x1c,0xc2,0x6c,0xcb,0xfc,0x03]
+          vcmpunordps -4(%rbx,%rcx,8), %ymm12, %ymm13
+
+// CHECK: vcmppd  $0, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1d,0xc2,0xeb,0x00]
+          vcmpeqpd %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmppd  $2, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1d,0xc2,0xeb,0x02]
+          vcmplepd %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmppd  $1, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1d,0xc2,0xeb,0x01]
+          vcmpltpd %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmppd  $4, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1d,0xc2,0xeb,0x04]
+          vcmpneqpd %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmppd  $6, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1d,0xc2,0xeb,0x06]
+          vcmpnlepd %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmppd  $5, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1d,0xc2,0xeb,0x05]
+          vcmpnltpd %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmppd  $7, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1d,0xc2,0xeb,0x07]
+          vcmpordpd %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmppd  $3, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1d,0xc2,0xeb,0x03]
+          vcmpunordpd %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmppd  $0, -4(%rbx,%rcx,8), %ymm12, %ymm13
+// CHECK: encoding: [0xc5,0x1d,0xc2,0x6c,0xcb,0xfc,0x00]
+          vcmpeqpd -4(%rbx,%rcx,8), %ymm12, %ymm13
+
+// CHECK: vcmppd  $2, -4(%rbx,%rcx,8), %ymm12, %ymm13
+// CHECK: encoding: [0xc5,0x1d,0xc2,0x6c,0xcb,0xfc,0x02]
+          vcmplepd -4(%rbx,%rcx,8), %ymm12, %ymm13
+
+// CHECK: vcmppd  $1, -4(%rbx,%rcx,8), %ymm12, %ymm13
+// CHECK: encoding: [0xc5,0x1d,0xc2,0x6c,0xcb,0xfc,0x01]
+          vcmpltpd -4(%rbx,%rcx,8), %ymm12, %ymm13
+
+// CHECK: vcmppd  $4, -4(%rbx,%rcx,8), %ymm12, %ymm13
+// CHECK: encoding: [0xc5,0x1d,0xc2,0x6c,0xcb,0xfc,0x04]
+          vcmpneqpd -4(%rbx,%rcx,8), %ymm12, %ymm13
+
+// CHECK: vcmppd  $6, -4(%rbx,%rcx,8), %ymm12, %ymm13
+// CHECK: encoding: [0xc5,0x1d,0xc2,0x6c,0xcb,0xfc,0x06]
+          vcmpnlepd -4(%rbx,%rcx,8), %ymm12, %ymm13
+
+// CHECK: vcmppd  $5, -4(%rbx,%rcx,8), %ymm12, %ymm13
+// CHECK: encoding: [0xc5,0x1d,0xc2,0x6c,0xcb,0xfc,0x05]
+          vcmpnltpd -4(%rbx,%rcx,8), %ymm12, %ymm13
+
+// CHECK: vcmppd  $7, -4(%rbx,%rcx,8), %ymm6, %ymm12
+// CHECK: encoding: [0xc5,0x4d,0xc2,0x64,0xcb,0xfc,0x07]
+          vcmpordpd -4(%rbx,%rcx,8), %ymm6, %ymm12
+
+// CHECK: vcmppd  $3, -4(%rbx,%rcx,8), %ymm12, %ymm13
+// CHECK: encoding: [0xc5,0x1d,0xc2,0x6c,0xcb,0xfc,0x03]
+          vcmpunordpd -4(%rbx,%rcx,8), %ymm12, %ymm13
+
+// CHECK: vcmpps  $8, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x08]
+          vcmpeq_uqps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps  $9, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x09]
+          vcmpngeps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps  $10, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x0a]
+          vcmpngtps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps  $11, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x0b]
+          vcmpfalseps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps  $12, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x0c]
+          vcmpneq_oqps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps  $13, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x0d]
+          vcmpgeps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps  $14, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x0e]
+          vcmpgtps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps  $15, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x0f]
+          vcmptrueps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps  $16, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x10]
+          vcmpeq_osps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps  $17, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x11]
+          vcmplt_oqps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps  $18, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x12]
+          vcmple_oqps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps  $19, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x13]
+          vcmpunord_sps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps  $20, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x14]
+          vcmpneq_usps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps  $21, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x15]
+          vcmpnlt_uqps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps  $22, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x16]
+          vcmpnle_uqps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps  $23, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x17]
+          vcmpord_sps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps  $24, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x18]
+          vcmpeq_usps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps  $25, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x19]
+          vcmpnge_uqps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps  $26, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x1a]
+          vcmpngt_uqps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps  $27, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x1b]
+          vcmpfalse_osps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps  $28, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x1c]
+          vcmpneq_osps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps  $29, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x1d]
+          vcmpge_oqps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps  $30, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x1e]
+          vcmpgt_oqps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps  $31, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x1f]
+          vcmptrue_usps %ymm11, %ymm12, %ymm13
+
diff --git a/test/Makefile b/test/Makefile
index f816744..f6830e6 100644
--- a/test/Makefile
+++ b/test/Makefile
@@ -202,3 +202,7 @@ Unit/lit.site.cfg: $(PROJ_OBJ_DIR)/Unit/.dir FORCE
 	     -e "s#@ENABLE_SHARED@#$(ENABLE_SHARED)#g" \
 	     -e "s#@SHLIBPATH_VAR@#$(SHLIBPATH_VAR)#g" \
 	     $(PROJ_SRC_DIR)/Unit/lit.site.cfg.in > $@
+
+# Daniel hates Chris.
+chris-lit:
+	make check-lit LIT_ARGS='-j16 -s'
diff --git a/test/Transforms/InstCombine/bit-checks.ll b/test/Transforms/InstCombine/bit-checks.ll
new file mode 100644
index 0000000..f97fb45
--- /dev/null
+++ b/test/Transforms/InstCombine/bit-checks.ll
@@ -0,0 +1,15 @@
+; This test makes sure that these instructions are properly eliminated.
+;
+; RUN: opt < %s -instcombine -S | \
+; RUN:    not grep {tobool}
+; END.
+define i32 @main(i32 %argc, i8** %argv) nounwind ssp {
+entry:
+  %and = and i32 %argc, 1                         ; <i32> [#uses=1]
+  %tobool = icmp ne i32 %and, 0                   ; <i1> [#uses=1]
+  %and2 = and i32 %argc, 2                        ; <i32> [#uses=1]
+  %tobool3 = icmp ne i32 %and2, 0                 ; <i1> [#uses=1]
+  %or.cond = and i1 %tobool, %tobool3             ; <i1> [#uses=1]
+  %retval.0 = select i1 %or.cond, i32 2, i32 1    ; <i32> [#uses=1]
+  ret i32 %retval.0
+}
diff --git a/test/Transforms/JumpThreading/crash.ll b/test/Transforms/JumpThreading/crash.ll
index 480b4a0..f0fc61e 100644
--- a/test/Transforms/JumpThreading/crash.ll
+++ b/test/Transforms/JumpThreading/crash.ll
@@ -387,3 +387,50 @@ if.end:                                           ; preds = %land.end69
   ret void
 }
 
+; PR7647
+define void @test15() nounwind {
+entry:
+  ret void
+  
+if.then237:
+  br label %lbl_664
+
+lbl_596:                                          ; preds = %lbl_664, %for.end37
+  volatile store i64 undef, i64* undef, align 4
+  br label %for.cond111
+
+for.cond111:                                      ; preds = %safe_sub_func_int64_t_s_s.exit, %lbl_596
+  %storemerge = phi i8 [ undef, %cond.true.i100 ], [ 22, %lbl_596 ] ; <i8> [#uses=1]
+  %l_678.5 = phi i64 [ %l_678.3, %cond.true.i100 ], [ undef, %lbl_596 ] ; <i64> [#uses=2]
+  %cmp114 = icmp slt i8 %storemerge, -2           ; <i1> [#uses=1]
+  br i1 %cmp114, label %lbl_664, label %if.end949
+
+lbl_664:                                          ; preds = %for.end1058, %if.then237, %for.cond111
+  %l_678.3 = phi i64 [ %l_678.5, %for.cond111 ], [ %l_678.2, %for.cond1035 ], [ 5, %if.then237 ] ; <i64> [#uses=1]
+  %tobool118 = icmp eq i32 undef, 0               ; <i1> [#uses=1]
+  br i1 %tobool118, label %cond.true.i100, label %lbl_596
+
+cond.true.i100:                                   ; preds = %for.inc120
+  br label %for.cond111
+
+lbl_709:
+  br label %if.end949
+  
+for.cond603:                                      ; preds = %for.body607, %if.end336
+  br i1 undef, label %for.cond603, label %if.end949
+
+if.end949:                                        ; preds = %for.cond603, %lbl_709, %for.cond111
+  %l_678.2 = phi i64 [ %l_678.5, %for.cond111 ], [ undef, %lbl_709 ], [ 5, %for.cond603 ] ; <i64> [#uses=1]
+  br label %for.body1016
+
+for.body1016:                                     ; preds = %for.cond1012
+  br label %for.body1016
+
+for.cond1035:                                     ; preds = %for.inc1055, %if.then1026
+  br i1 undef, label %for.cond1040, label %lbl_664
+
+for.cond1040:                                     ; preds = %for.body1044, %for.cond1035
+  ret void
+}
+
+
diff --git a/tools/Makefile b/tools/Makefile
index 9d2e576..9bc74fe 100644
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -49,9 +49,4 @@ ifeq ($(ENABLE_PIC),1)
   endif
 endif
 
-# Don't build edis if we explicitly disabled it.
-ifeq ($(DISABLE_EDIS),1)
-  PARALLEL_DIRS := $(filter-out edis, $(PARALLEL_DIRS))
-endif
-
 include $(LEVEL)/Makefile.common
diff --git a/tools/edis/Makefile b/tools/edis/Makefile
index 9151f62..0d2e26f 100644
--- a/tools/edis/Makefile
+++ b/tools/edis/Makefile
@@ -20,10 +20,12 @@ EXPORTED_SYMBOL_FILE = $(PROJ_SRC_DIR)/EnhancedDisassembly.exports
 include $(LEVEL)/Makefile.config
 
 ifeq ($(ENABLE_PIC),1)
+  ifneq ($(DISABLE_EDIS),1)
     ifneq ($(TARGET_OS), $(filter $(TARGET_OS), Cygwin MingW))
 	LINK_LIBS_IN_SHARED = 1
 	SHARED_LIBRARY = 1
     endif
+  endif
 endif
 
 LINK_COMPONENTS := $(TARGETS_TO_BUILD) x86asmprinter x86disassembler
diff --git a/unittests/ADT/APIntTest.cpp b/unittests/ADT/APIntTest.cpp
index d08e86a..0cb7996 100644
--- a/unittests/ADT/APIntTest.cpp
+++ b/unittests/ADT/APIntTest.cpp
@@ -20,8 +20,8 @@ namespace {
 TEST(APIntTest, ShiftLeftByZero) {
   APInt One = APInt::getNullValue(65) + 1;
   APInt Shl = One.shl(0);
-  EXPECT_EQ(true, Shl[0]);
-  EXPECT_EQ(false, Shl[1]);
+  EXPECT_TRUE(Shl[0]);
+  EXPECT_FALSE(Shl[1]);
 }
 
 TEST(APIntTest, i128_NegativeCount) {
diff --git a/unittests/VMCore/PassManagerTest.cpp b/unittests/VMCore/PassManagerTest.cpp
index 4b38910..cabfc44 100644
--- a/unittests/VMCore/PassManagerTest.cpp
+++ b/unittests/VMCore/PassManagerTest.cpp
@@ -105,8 +105,8 @@ namespace llvm {
       static bool finalized;
       int allocated;
       void run() {
-        EXPECT_EQ(true, initialized);
-        EXPECT_EQ(false, finalized);
+        EXPECT_TRUE(initialized);
+        EXPECT_FALSE(finalized);
         EXPECT_EQ(0, allocated);
         allocated++;
         runc++;
@@ -115,8 +115,8 @@ namespace llvm {
       static char ID;
       static void finishedOK(int run) {
         EXPECT_GT(runc, 0);
-        EXPECT_EQ(true, initialized);
-        EXPECT_EQ(true, finalized);
+        EXPECT_TRUE(initialized);
+        EXPECT_TRUE(finalized);
         EXPECT_EQ(run, runc);
       }
       PassTestBase() : P(&ID), allocated(0) {
@@ -140,12 +140,12 @@ namespace llvm {
     struct PassTest : public PassTestBase<P> {
     public:
       virtual bool doInitialization(T &t) {
-        EXPECT_EQ(false, PassTestBase<P>::initialized);
+        EXPECT_FALSE(PassTestBase<P>::initialized);
         PassTestBase<P>::initialized = true;
         return false;
       }
       virtual bool doFinalization(T &t) {
-        EXPECT_EQ(false, PassTestBase<P>::finalized);
+        EXPECT_FALSE(PassTestBase<P>::finalized);
         PassTestBase<P>::finalized = true;
         EXPECT_EQ(0, PassTestBase<P>::allocated);
         return false;
@@ -180,7 +180,7 @@ namespace llvm {
     public:
       LPass() {
         initcount = 0; fincount=0;
-        EXPECT_EQ(false, initialized);
+        EXPECT_FALSE(initialized);
       }
       static void finishedOK(int run, int finalized) {
         PassTestBase<LoopPass>::finishedOK(run);
@@ -222,7 +222,7 @@ namespace llvm {
         fin = 0;
       }
       virtual bool doInitialization(Module &M) {
-        EXPECT_EQ(false, initialized);
+        EXPECT_FALSE(initialized);
         initialized = true;
         return false;
       }
@@ -240,7 +240,7 @@ namespace llvm {
         return false;
       }
       virtual bool doFinalization(Module &M) {
-        EXPECT_EQ(false, finalized);
+        EXPECT_FALSE(finalized);
         finalized = true;
         EXPECT_EQ(0, allocated);
         return false;
diff --git a/utils/buildit/GNUmakefile b/utils/buildit/GNUmakefile
index 0f3b7eb..d17585f 100644
--- a/utils/buildit/GNUmakefile
+++ b/utils/buildit/GNUmakefile
@@ -32,7 +32,7 @@ DSTROOT = $(OBJROOT)/../dst
 
 #######################################################################
 
-PREFIX = /usr/local
+PREFIX = /Developer/usr/local
 
 # Unless assertions are forced on in the GMAKE command line, disable them.
 ifndef ENABLE_ASSERTIONS
@@ -70,7 +70,7 @@ install: $(OBJROOT) $(SYMROOT) $(DSTROOT)
 	    $(RC_ProjectSourceVersion) $(RC_ProjectSourceSubversion) 
 
 EmbeddedHosted:
-	$(MAKE) ARM_HOSTED_BUILD=yes install
+	$(MAKE) ARM_HOSTED_BUILD=yes PREFIX=/usr install
 
 # installhdrs does nothing, because the headers aren't useful until
 # the compiler is installed.
diff --git a/utils/buildit/build_llvm b/utils/buildit/build_llvm
index b466755..37ef16e 100755
--- a/utils/buildit/build_llvm
+++ b/utils/buildit/build_llvm
@@ -63,15 +63,6 @@ DIR=`pwd`
 DARWIN_VERS=`uname -r | sed 's/\..*//'`
 echo DARWIN_VERS = $DARWIN_VERS
 
-DEVELOPER_DIR="${DEVELOPER_DIR-Developer}"
-if [ "$ARM_HOSTED_BUILD" = yes ]; then
-    DT_HOME="$DEST_DIR/usr"
-else
-    DT_HOME="$DEST_DIR/$DEVELOPER_DIR/usr"
-fi
-
-DEST_ROOT="/$DEVELOPER_DIR$DEST_ROOT"
-
 ################################################################################
 # Run the build.
 
@@ -131,16 +122,14 @@ elif [ $DARWIN_VERS -gt 9 ]; then
 fi
 
 if [ "$ARM_HOSTED_BUILD" = yes ]; then
-  configure_prefix=$DT_HOME
   configure_opts="--enable-targets=arm --host=arm-apple-darwin10 \
                   --target=arm-apple-darwin10 --build=i686-apple-darwin10"
 else
-  configure_prefix=$DT_HOME/local
   configure_opts="--enable-targets=arm,x86,powerpc,cbe"
 fi
 
 if [ \! -f Makefile.config ]; then
-  $SRC_DIR/configure --prefix=$configure_prefix $configure_opts \
+  $SRC_DIR/configure --prefix=$DEST_DIR$DEST_ROOT $configure_opts \
     --enable-assertions=$LLVM_ASSERTIONS \
     --enable-optimized=$LLVM_OPTIMIZED \
     --disable-bindings \
@@ -254,7 +243,7 @@ if [ "x$LLVM_DEBUG" != "x1" ]; then
 fi
 
 # Copy over the tblgen utility.
-cp `find $DIR -name tblgen` $DT_HOME/local/bin
+cp `find $DIR -name tblgen` $DEST_DIR$DEST_ROOT/bin
 
 # Remove .dir files 
 cd $DEST_DIR$DEST_ROOT
@@ -274,7 +263,7 @@ else
 fi
 
 # The Hello dylib is an example of how to build a pass. No need to install it.
-rm $DEST_DIR$DEST_ROOT/lib/libLLVMHello.dylib
+rm $DEST_DIR$DEST_ROOT/lib/LLVMHello.dylib
 
 # Compress manpages
 MDIR=$DEST_DIR$DEST_ROOT/share/man/man1
@@ -324,12 +313,15 @@ find obj-* -name \*.\[chy\] -o -name \*.cpp -print \
 
 cd $DEST_DIR$DEST_ROOT
 if [ "$INSTALL_LIBLTO" = "yes" ]; then
+  DT_HOME="$DEST_DIR/Developer/usr"
   mkdir -p $DT_HOME/lib
   mv lib/libLTO.dylib $DT_HOME/lib/libLTO.dylib
 
   # Use '-l' to strip i386 modules. N.B. that flag doesn't work with kext or
   # PPC objects!
   strip -arch all -Sl $DT_HOME/lib/libLTO.dylib
+else
+  rm -f lib/libLTO.dylib
 fi
 rm -f lib/libLTO.a lib/libLTO.la
 
@@ -353,9 +345,9 @@ chgrp -h -R wheel $DEST_DIR
 chgrp -R wheel $DEST_DIR
 
 ################################################################################
-# Remove tar ball from docs directory
+# Remove the docs directory
 
-find $DEST_DIR -name html.tar.gz -exec rm {} \;
+rm -rf $DEST_DIR$DEST_ROOT/docs
 
 ################################################################################
 # symlinks so that B&I can find things
@@ -363,7 +355,7 @@ find $DEST_DIR -name html.tar.gz -exec rm {} \;
 if [ "$INSTALL_LIBLTO" = "yes" ]; then
   mkdir -p $DEST_DIR/usr/lib/
   cd $DEST_DIR/usr/lib && \
-    ln -s ../../$DEVELOPER_DIR/usr/lib/libLTO.dylib ./libLTO.dylib
+    ln -s ../../Developer/usr/lib/libLTO.dylib ./libLTO.dylib
 fi
 
 ################################################################################
author	rdivacky <rdivacky@FreeBSD.org>	2010-07-15 17:06:11 +0000
committer	rdivacky <rdivacky@FreeBSD.org>	2010-07-15 17:06:11 +0000
commit	c1c3262b63b1d5fbba6a7ad188f4e47d92c7840e (patch)
tree	5b6d391c72c9875f0065f0e772e872bc8544834b
parent	9112829d76cbb8e0c8ef51bbc2d7d1be48cd7b74 (diff)
download	FreeBSD-src-c1c3262b63b1d5fbba6a7ad188f4e47d92c7840e.zip FreeBSD-src-c1c3262b63b1d5fbba6a7ad188f4e47d92c7840e.tar.gz